1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn < %s | FileCheck --check-prefixes=SI,SI-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn < %s | FileCheck --check-prefixes=SI,SI-GISEL %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefixes=VI,VI-SDAG %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefixes=VI,VI-GISEL %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX9,GFX9-SDAG %s 7; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX9,GFX9-GISEL %s 8; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-SDAG %s 9; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL %s 10; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s 11; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s 12; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s 13; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s 14 15; Test that add/sub with a constant is swapped to sub/add with negated 16; constant to minimize code size. 17 18define amdgpu_kernel void @v_test_i32_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 19; SI-SDAG-LABEL: v_test_i32_x_sub_64: 20; SI-SDAG: ; %bb.0: 21; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 22; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 23; SI-SDAG-NEXT: s_mov_b32 s6, 0 24; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 25; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 26; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 27; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 28; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 29; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 30; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 31; SI-SDAG-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2 32; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 33; SI-SDAG-NEXT: s_endpgm 34; 35; SI-GISEL-LABEL: v_test_i32_x_sub_64: 36; SI-GISEL: ; %bb.0: 37; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 38; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 39; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 40; SI-GISEL-NEXT: s_mov_b32 s6, 0 41; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 42; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 43; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 44; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 45; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 46; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2 47; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 48; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 49; SI-GISEL-NEXT: s_endpgm 50; 51; VI-SDAG-LABEL: v_test_i32_x_sub_64: 52; VI-SDAG: ; %bb.0: 53; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 54; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 55; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 56; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 57; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 58; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 59; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 60; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 61; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 62; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 63; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 64; VI-SDAG-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3 65; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 66; VI-SDAG-NEXT: s_endpgm 67; 68; VI-GISEL-LABEL: v_test_i32_x_sub_64: 69; VI-GISEL: ; %bb.0: 70; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 71; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 72; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 73; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 74; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 75; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 76; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 77; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 78; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 79; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 80; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 81; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 82; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 83; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3 84; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 85; VI-GISEL-NEXT: s_endpgm 86; 87; GFX9-SDAG-LABEL: v_test_i32_x_sub_64: 88; GFX9-SDAG: ; %bb.0: 89; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 90; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 91; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 92; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 93; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 94; GFX9-SDAG-NEXT: v_subrev_u32_e32 v1, 64, v1 95; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 96; GFX9-SDAG-NEXT: s_endpgm 97; 98; GFX9-GISEL-LABEL: v_test_i32_x_sub_64: 99; GFX9-GISEL: ; %bb.0: 100; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 101; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 102; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 103; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 104; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 105; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 0xffffffc0, v1 106; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 107; GFX9-GISEL-NEXT: s_endpgm 108; 109; GFX10-SDAG-LABEL: v_test_i32_x_sub_64: 110; GFX10-SDAG: ; %bb.0: 111; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 112; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 113; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 114; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 115; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 116; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 117; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 118; GFX10-SDAG-NEXT: s_endpgm 119; 120; GFX10-GISEL-LABEL: v_test_i32_x_sub_64: 121; GFX10-GISEL: ; %bb.0: 122; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 123; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 124; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 125; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 126; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 127; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1 128; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 129; GFX10-GISEL-NEXT: s_endpgm 130; 131; GFX11-SDAG-LABEL: v_test_i32_x_sub_64: 132; GFX11-SDAG: ; %bb.0: 133; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 134; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 135; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 136; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 137; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 138; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 139; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 140; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 141; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 142; GFX11-SDAG-NEXT: s_endpgm 143; 144; GFX11-GISEL-LABEL: v_test_i32_x_sub_64: 145; GFX11-GISEL: ; %bb.0: 146; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 147; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 148; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 149; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 150; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 151; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 152; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 153; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1 154; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 155; GFX11-GISEL-NEXT: s_endpgm 156 %tid = call i32 @llvm.amdgcn.workitem.id.x() 157 %tid.ext = sext i32 %tid to i64 158 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 159 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 160 %x = load i32, ptr addrspace(1) %gep 161 %result = sub i32 %x, 64 162 store i32 %result, ptr addrspace(1) %gep.out 163 ret void 164} 165 166define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 167; SI-SDAG-LABEL: v_test_i32_x_sub_64_multi_use: 168; SI-SDAG: ; %bb.0: 169; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 170; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 171; SI-SDAG-NEXT: s_mov_b32 s6, 0 172; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 173; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 174; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 175; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 176; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc 177; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 178; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc 179; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 180; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 181; SI-SDAG-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2 182; SI-SDAG-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3 183; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 184; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 185; SI-SDAG-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 186; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 187; SI-SDAG-NEXT: s_endpgm 188; 189; SI-GISEL-LABEL: v_test_i32_x_sub_64_multi_use: 190; SI-GISEL: ; %bb.0: 191; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 192; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 193; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 194; SI-GISEL-NEXT: s_mov_b32 s6, 0 195; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 196; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 197; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 198; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 glc 199; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 200; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc 201; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 202; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 203; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2 204; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3 205; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 206; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 207; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 208; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 209; SI-GISEL-NEXT: s_endpgm 210; 211; VI-SDAG-LABEL: v_test_i32_x_sub_64_multi_use: 212; VI-SDAG: ; %bb.0: 213; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 214; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 215; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 216; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 217; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 218; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 219; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] glc 220; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 221; VI-SDAG-NEXT: flat_load_dword v4, v[0:1] glc 222; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 223; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 224; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 225; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 226; VI-SDAG-NEXT: v_subrev_u32_e32 v2, vcc, 64, v3 227; VI-SDAG-NEXT: v_subrev_u32_e32 v3, vcc, 64, v4 228; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 229; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 230; VI-SDAG-NEXT: flat_store_dword v[0:1], v3 231; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 232; VI-SDAG-NEXT: s_endpgm 233; 234; VI-GISEL-LABEL: v_test_i32_x_sub_64_multi_use: 235; VI-GISEL: ; %bb.0: 236; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 237; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 238; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 239; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 240; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 241; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 242; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 243; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] glc 244; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 245; VI-GISEL-NEXT: flat_load_dword v4, v[0:1] glc 246; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 247; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 248; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 249; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 250; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 251; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3 252; VI-GISEL-NEXT: v_add_u32_e32 v3, vcc, 0xffffffc0, v4 253; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 254; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 255; VI-GISEL-NEXT: flat_store_dword v[0:1], v3 256; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 257; VI-GISEL-NEXT: s_endpgm 258; 259; GFX9-SDAG-LABEL: v_test_i32_x_sub_64_multi_use: 260; GFX9-SDAG: ; %bb.0: 261; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 262; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 263; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 264; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc 265; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 266; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[2:3] glc 267; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 268; GFX9-SDAG-NEXT: v_subrev_u32_e32 v1, 64, v1 269; GFX9-SDAG-NEXT: v_subrev_u32_e32 v2, 64, v2 270; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 271; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 272; GFX9-SDAG-NEXT: global_store_dword v0, v2, s[0:1] 273; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 274; GFX9-SDAG-NEXT: s_endpgm 275; 276; GFX9-GISEL-LABEL: v_test_i32_x_sub_64_multi_use: 277; GFX9-GISEL: ; %bb.0: 278; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 279; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 280; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 281; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc 282; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 283; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[2:3] glc 284; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 285; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 0xffffffc0, v1 286; GFX9-GISEL-NEXT: v_add_u32_e32 v2, 0xffffffc0, v2 287; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 288; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 289; GFX9-GISEL-NEXT: global_store_dword v0, v2, s[0:1] 290; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 291; GFX9-GISEL-NEXT: s_endpgm 292; 293; GFX10-SDAG-LABEL: v_test_i32_x_sub_64_multi_use: 294; GFX10-SDAG: ; %bb.0: 295; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 296; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 297; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 298; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3] glc dlc 299; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 300; GFX10-SDAG-NEXT: global_load_dword v2, v0, s[2:3] glc dlc 301; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 302; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 303; GFX10-SDAG-NEXT: v_subrev_nc_u32_e32 v2, 64, v2 304; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 305; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 306; GFX10-SDAG-NEXT: global_store_dword v0, v2, s[0:1] 307; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 308; GFX10-SDAG-NEXT: s_endpgm 309; 310; GFX10-GISEL-LABEL: v_test_i32_x_sub_64_multi_use: 311; GFX10-GISEL: ; %bb.0: 312; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 313; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 314; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 315; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3] glc dlc 316; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 317; GFX10-GISEL-NEXT: global_load_dword v2, v0, s[2:3] glc dlc 318; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 319; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1 320; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 0xffffffc0, v2 321; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 322; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 323; GFX10-GISEL-NEXT: global_store_dword v0, v2, s[0:1] 324; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 325; GFX10-GISEL-NEXT: s_endpgm 326; 327; GFX11-SDAG-LABEL: v_test_i32_x_sub_64_multi_use: 328; GFX11-SDAG: ; %bb.0: 329; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 330; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 331; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 332; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 333; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 334; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 335; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 336; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc 337; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 338; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v1, 64, v1 339; GFX11-SDAG-NEXT: v_subrev_nc_u32_e32 v2, 64, v2 340; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] dlc 341; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 342; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1] dlc 343; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 344; GFX11-SDAG-NEXT: s_endpgm 345; 346; GFX11-GISEL-LABEL: v_test_i32_x_sub_64_multi_use: 347; GFX11-GISEL: ; %bb.0: 348; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 349; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 350; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 351; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 352; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 353; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 354; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 355; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[2:3] glc dlc 356; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 357; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v1, 0xffffffc0, v1 358; GFX11-GISEL-NEXT: v_add_nc_u32_e32 v2, 0xffffffc0, v2 359; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] dlc 360; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 361; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1] dlc 362; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 363; GFX11-GISEL-NEXT: s_endpgm 364 %tid = call i32 @llvm.amdgcn.workitem.id.x() 365 %tid.ext = sext i32 %tid to i64 366 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 367 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 368 %x = load volatile i32, ptr addrspace(1) %gep 369 %y = load volatile i32, ptr addrspace(1) %gep 370 %result0 = sub i32 %x, 64 371 %result1 = sub i32 %y, 64 372 store volatile i32 %result0, ptr addrspace(1) %gep.out 373 store volatile i32 %result1, ptr addrspace(1) %gep.out 374 ret void 375} 376 377define amdgpu_kernel void @v_test_i32_64_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 378; SI-SDAG-LABEL: v_test_i32_64_sub_x: 379; SI-SDAG: ; %bb.0: 380; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 381; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 382; SI-SDAG-NEXT: s_mov_b32 s6, 0 383; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 384; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 385; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 386; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 387; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 388; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 389; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 390; SI-SDAG-NEXT: v_sub_i32_e32 v2, vcc, 64, v2 391; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 392; SI-SDAG-NEXT: s_endpgm 393; 394; SI-GISEL-LABEL: v_test_i32_64_sub_x: 395; SI-GISEL: ; %bb.0: 396; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 397; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 398; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 399; SI-GISEL-NEXT: s_mov_b32 s6, 0 400; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 401; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 402; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 403; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 404; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 405; SI-GISEL-NEXT: v_sub_i32_e32 v2, vcc, 64, v2 406; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 407; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 408; SI-GISEL-NEXT: s_endpgm 409; 410; VI-SDAG-LABEL: v_test_i32_64_sub_x: 411; VI-SDAG: ; %bb.0: 412; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 413; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 414; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 415; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 416; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 417; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 418; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 419; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 420; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 421; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 422; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 423; VI-SDAG-NEXT: v_sub_u32_e32 v2, vcc, 64, v3 424; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 425; VI-SDAG-NEXT: s_endpgm 426; 427; VI-GISEL-LABEL: v_test_i32_64_sub_x: 428; VI-GISEL: ; %bb.0: 429; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 430; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 431; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 432; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 433; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 434; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 435; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 436; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 437; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 438; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 439; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 440; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 441; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 442; VI-GISEL-NEXT: v_sub_u32_e32 v2, vcc, 64, v3 443; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 444; VI-GISEL-NEXT: s_endpgm 445; 446; GFX9-LABEL: v_test_i32_64_sub_x: 447; GFX9: ; %bb.0: 448; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 449; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 450; GFX9-NEXT: s_waitcnt lgkmcnt(0) 451; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 452; GFX9-NEXT: s_waitcnt vmcnt(0) 453; GFX9-NEXT: v_sub_u32_e32 v1, 64, v1 454; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 455; GFX9-NEXT: s_endpgm 456; 457; GFX10-LABEL: v_test_i32_64_sub_x: 458; GFX10: ; %bb.0: 459; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 460; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 461; GFX10-NEXT: s_waitcnt lgkmcnt(0) 462; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 463; GFX10-NEXT: s_waitcnt vmcnt(0) 464; GFX10-NEXT: v_sub_nc_u32_e32 v1, 64, v1 465; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 466; GFX10-NEXT: s_endpgm 467; 468; GFX11-LABEL: v_test_i32_64_sub_x: 469; GFX11: ; %bb.0: 470; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 471; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 472; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 473; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 474; GFX11-NEXT: s_waitcnt lgkmcnt(0) 475; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 476; GFX11-NEXT: s_waitcnt vmcnt(0) 477; GFX11-NEXT: v_sub_nc_u32_e32 v1, 64, v1 478; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 479; GFX11-NEXT: s_endpgm 480 %tid = call i32 @llvm.amdgcn.workitem.id.x() 481 %tid.ext = sext i32 %tid to i64 482 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 483 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 484 %x = load i32, ptr addrspace(1) %gep 485 %result = sub i32 64, %x 486 store i32 %result, ptr addrspace(1) %gep.out 487 ret void 488} 489 490define amdgpu_kernel void @v_test_i32_x_sub_65(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 491; SI-SDAG-LABEL: v_test_i32_x_sub_65: 492; SI-SDAG: ; %bb.0: 493; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 494; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 495; SI-SDAG-NEXT: s_mov_b32 s6, 0 496; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 497; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 498; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 499; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 500; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 501; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 502; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 503; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xffffffbf, v2 504; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 505; SI-SDAG-NEXT: s_endpgm 506; 507; SI-GISEL-LABEL: v_test_i32_x_sub_65: 508; SI-GISEL: ; %bb.0: 509; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 510; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 511; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 512; SI-GISEL-NEXT: s_mov_b32 s6, 0 513; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 514; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 515; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 516; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 517; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 518; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffbf, v2 519; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 520; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 521; SI-GISEL-NEXT: s_endpgm 522; 523; VI-SDAG-LABEL: v_test_i32_x_sub_65: 524; VI-SDAG: ; %bb.0: 525; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 526; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 527; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 528; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 529; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 530; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 531; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 532; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 533; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 534; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 535; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 536; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, 0xffffffbf, v3 537; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 538; VI-SDAG-NEXT: s_endpgm 539; 540; VI-GISEL-LABEL: v_test_i32_x_sub_65: 541; VI-GISEL: ; %bb.0: 542; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 543; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 544; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 545; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 546; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 547; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 548; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 549; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 550; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 551; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 552; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 553; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 554; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 555; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 0xffffffbf, v3 556; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 557; VI-GISEL-NEXT: s_endpgm 558; 559; GFX9-LABEL: v_test_i32_x_sub_65: 560; GFX9: ; %bb.0: 561; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 562; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 563; GFX9-NEXT: s_waitcnt lgkmcnt(0) 564; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 565; GFX9-NEXT: s_waitcnt vmcnt(0) 566; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffbf, v1 567; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 568; GFX9-NEXT: s_endpgm 569; 570; GFX10-LABEL: v_test_i32_x_sub_65: 571; GFX10: ; %bb.0: 572; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 573; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 574; GFX10-NEXT: s_waitcnt lgkmcnt(0) 575; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 576; GFX10-NEXT: s_waitcnt vmcnt(0) 577; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1 578; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 579; GFX10-NEXT: s_endpgm 580; 581; GFX11-LABEL: v_test_i32_x_sub_65: 582; GFX11: ; %bb.0: 583; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 584; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 585; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 586; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 587; GFX11-NEXT: s_waitcnt lgkmcnt(0) 588; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 589; GFX11-NEXT: s_waitcnt vmcnt(0) 590; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffbf, v1 591; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 592; GFX11-NEXT: s_endpgm 593 %tid = call i32 @llvm.amdgcn.workitem.id.x() 594 %tid.ext = sext i32 %tid to i64 595 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 596 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 597 %x = load i32, ptr addrspace(1) %gep 598 %result = sub i32 %x, 65 599 store i32 %result, ptr addrspace(1) %gep.out 600 ret void 601} 602 603define amdgpu_kernel void @v_test_i32_65_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 604; SI-SDAG-LABEL: v_test_i32_65_sub_x: 605; SI-SDAG: ; %bb.0: 606; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 607; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 608; SI-SDAG-NEXT: s_mov_b32 s6, 0 609; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 610; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 611; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 612; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 613; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 614; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 615; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 616; SI-SDAG-NEXT: v_sub_i32_e32 v2, vcc, 0x41, v2 617; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 618; SI-SDAG-NEXT: s_endpgm 619; 620; SI-GISEL-LABEL: v_test_i32_65_sub_x: 621; SI-GISEL: ; %bb.0: 622; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 623; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 624; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 625; SI-GISEL-NEXT: s_mov_b32 s6, 0 626; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 627; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 628; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 629; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 630; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 631; SI-GISEL-NEXT: v_sub_i32_e32 v2, vcc, 0x41, v2 632; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 633; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 634; SI-GISEL-NEXT: s_endpgm 635; 636; VI-SDAG-LABEL: v_test_i32_65_sub_x: 637; VI-SDAG: ; %bb.0: 638; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 639; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 640; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 641; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 642; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 643; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 644; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 645; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 646; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 647; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 648; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 649; VI-SDAG-NEXT: v_sub_u32_e32 v2, vcc, 0x41, v3 650; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 651; VI-SDAG-NEXT: s_endpgm 652; 653; VI-GISEL-LABEL: v_test_i32_65_sub_x: 654; VI-GISEL: ; %bb.0: 655; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 656; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 657; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 658; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 659; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 660; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 661; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 662; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 663; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 664; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 665; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 666; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 667; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 668; VI-GISEL-NEXT: v_sub_u32_e32 v2, vcc, 0x41, v3 669; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 670; VI-GISEL-NEXT: s_endpgm 671; 672; GFX9-LABEL: v_test_i32_65_sub_x: 673; GFX9: ; %bb.0: 674; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 675; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 676; GFX9-NEXT: s_waitcnt lgkmcnt(0) 677; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 678; GFX9-NEXT: s_waitcnt vmcnt(0) 679; GFX9-NEXT: v_sub_u32_e32 v1, 0x41, v1 680; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 681; GFX9-NEXT: s_endpgm 682; 683; GFX10-LABEL: v_test_i32_65_sub_x: 684; GFX10: ; %bb.0: 685; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 686; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 687; GFX10-NEXT: s_waitcnt lgkmcnt(0) 688; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 689; GFX10-NEXT: s_waitcnt vmcnt(0) 690; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0x41, v1 691; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 692; GFX10-NEXT: s_endpgm 693; 694; GFX11-LABEL: v_test_i32_65_sub_x: 695; GFX11: ; %bb.0: 696; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 697; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 698; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 699; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 700; GFX11-NEXT: s_waitcnt lgkmcnt(0) 701; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 702; GFX11-NEXT: s_waitcnt vmcnt(0) 703; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0x41, v1 704; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 705; GFX11-NEXT: s_endpgm 706 %tid = call i32 @llvm.amdgcn.workitem.id.x() 707 %tid.ext = sext i32 %tid to i64 708 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 709 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 710 %x = load i32, ptr addrspace(1) %gep 711 %result = sub i32 65, %x 712 store i32 %result, ptr addrspace(1) %gep.out 713 ret void 714} 715 716define amdgpu_kernel void @v_test_i32_x_sub_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 717; SI-SDAG-LABEL: v_test_i32_x_sub_neg16: 718; SI-SDAG: ; %bb.0: 719; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 720; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 721; SI-SDAG-NEXT: s_mov_b32 s6, 0 722; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 723; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 724; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 725; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 726; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 727; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 728; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 729; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 16, v2 730; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 731; SI-SDAG-NEXT: s_endpgm 732; 733; SI-GISEL-LABEL: v_test_i32_x_sub_neg16: 734; SI-GISEL: ; %bb.0: 735; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 736; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 737; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 738; SI-GISEL-NEXT: s_mov_b32 s6, 0 739; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 740; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 741; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 742; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 743; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 744; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 16, v2 745; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 746; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 747; SI-GISEL-NEXT: s_endpgm 748; 749; VI-SDAG-LABEL: v_test_i32_x_sub_neg16: 750; VI-SDAG: ; %bb.0: 751; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 752; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 753; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 754; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 755; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 756; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 757; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 758; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 759; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 760; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 761; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 762; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, 16, v3 763; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 764; VI-SDAG-NEXT: s_endpgm 765; 766; VI-GISEL-LABEL: v_test_i32_x_sub_neg16: 767; VI-GISEL: ; %bb.0: 768; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 769; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 770; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 771; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 772; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 773; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 774; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 775; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 776; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 777; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 778; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 779; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 780; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 781; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 16, v3 782; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 783; VI-GISEL-NEXT: s_endpgm 784; 785; GFX9-LABEL: v_test_i32_x_sub_neg16: 786; GFX9: ; %bb.0: 787; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 788; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 789; GFX9-NEXT: s_waitcnt lgkmcnt(0) 790; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 791; GFX9-NEXT: s_waitcnt vmcnt(0) 792; GFX9-NEXT: v_add_u32_e32 v1, 16, v1 793; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 794; GFX9-NEXT: s_endpgm 795; 796; GFX10-LABEL: v_test_i32_x_sub_neg16: 797; GFX10: ; %bb.0: 798; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 799; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 800; GFX10-NEXT: s_waitcnt lgkmcnt(0) 801; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 802; GFX10-NEXT: s_waitcnt vmcnt(0) 803; GFX10-NEXT: v_add_nc_u32_e32 v1, 16, v1 804; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 805; GFX10-NEXT: s_endpgm 806; 807; GFX11-LABEL: v_test_i32_x_sub_neg16: 808; GFX11: ; %bb.0: 809; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 810; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 811; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 812; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 813; GFX11-NEXT: s_waitcnt lgkmcnt(0) 814; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 815; GFX11-NEXT: s_waitcnt vmcnt(0) 816; GFX11-NEXT: v_add_nc_u32_e32 v1, 16, v1 817; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 818; GFX11-NEXT: s_endpgm 819 %tid = call i32 @llvm.amdgcn.workitem.id.x() 820 %tid.ext = sext i32 %tid to i64 821 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 822 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 823 %x = load i32, ptr addrspace(1) %gep 824 %result = sub i32 %x, -16 825 store i32 %result, ptr addrspace(1) %gep.out 826 ret void 827} 828 829define amdgpu_kernel void @v_test_i32_neg16_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 830; SI-SDAG-LABEL: v_test_i32_neg16_sub_x: 831; SI-SDAG: ; %bb.0: 832; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 833; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 834; SI-SDAG-NEXT: s_mov_b32 s6, 0 835; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 836; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 837; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 838; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 839; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 840; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 841; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 842; SI-SDAG-NEXT: v_sub_i32_e32 v2, vcc, -16, v2 843; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 844; SI-SDAG-NEXT: s_endpgm 845; 846; SI-GISEL-LABEL: v_test_i32_neg16_sub_x: 847; SI-GISEL: ; %bb.0: 848; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 849; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 850; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 851; SI-GISEL-NEXT: s_mov_b32 s6, 0 852; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 853; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 854; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 855; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 856; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 857; SI-GISEL-NEXT: v_sub_i32_e32 v2, vcc, -16, v2 858; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 859; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 860; SI-GISEL-NEXT: s_endpgm 861; 862; VI-SDAG-LABEL: v_test_i32_neg16_sub_x: 863; VI-SDAG: ; %bb.0: 864; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 865; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 866; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 867; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 868; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 869; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 870; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 871; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 872; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 873; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 874; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 875; VI-SDAG-NEXT: v_sub_u32_e32 v2, vcc, -16, v3 876; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 877; VI-SDAG-NEXT: s_endpgm 878; 879; VI-GISEL-LABEL: v_test_i32_neg16_sub_x: 880; VI-GISEL: ; %bb.0: 881; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 882; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 883; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 884; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 885; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 886; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 887; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 888; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 889; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 890; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 891; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 892; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 893; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 894; VI-GISEL-NEXT: v_sub_u32_e32 v2, vcc, -16, v3 895; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 896; VI-GISEL-NEXT: s_endpgm 897; 898; GFX9-LABEL: v_test_i32_neg16_sub_x: 899; GFX9: ; %bb.0: 900; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 901; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 902; GFX9-NEXT: s_waitcnt lgkmcnt(0) 903; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 904; GFX9-NEXT: s_waitcnt vmcnt(0) 905; GFX9-NEXT: v_sub_u32_e32 v1, -16, v1 906; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 907; GFX9-NEXT: s_endpgm 908; 909; GFX10-LABEL: v_test_i32_neg16_sub_x: 910; GFX10: ; %bb.0: 911; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 912; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 913; GFX10-NEXT: s_waitcnt lgkmcnt(0) 914; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 915; GFX10-NEXT: s_waitcnt vmcnt(0) 916; GFX10-NEXT: v_sub_nc_u32_e32 v1, -16, v1 917; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 918; GFX10-NEXT: s_endpgm 919; 920; GFX11-LABEL: v_test_i32_neg16_sub_x: 921; GFX11: ; %bb.0: 922; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 923; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 924; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 925; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 926; GFX11-NEXT: s_waitcnt lgkmcnt(0) 927; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 928; GFX11-NEXT: s_waitcnt vmcnt(0) 929; GFX11-NEXT: v_sub_nc_u32_e32 v1, -16, v1 930; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 931; GFX11-NEXT: s_endpgm 932 %tid = call i32 @llvm.amdgcn.workitem.id.x() 933 %tid.ext = sext i32 %tid to i64 934 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 935 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 936 %x = load i32, ptr addrspace(1) %gep 937 %result = sub i32 -16, %x 938 store i32 %result, ptr addrspace(1) %gep.out 939 ret void 940} 941 942define amdgpu_kernel void @v_test_i32_x_sub_neg17(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 943; SI-SDAG-LABEL: v_test_i32_x_sub_neg17: 944; SI-SDAG: ; %bb.0: 945; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 946; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 947; SI-SDAG-NEXT: s_mov_b32 s6, 0 948; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 949; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 950; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 951; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 952; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 953; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 954; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 955; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 17, v2 956; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 957; SI-SDAG-NEXT: s_endpgm 958; 959; SI-GISEL-LABEL: v_test_i32_x_sub_neg17: 960; SI-GISEL: ; %bb.0: 961; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 962; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 963; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 964; SI-GISEL-NEXT: s_mov_b32 s6, 0 965; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 966; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 967; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 968; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 969; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 970; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 17, v2 971; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 972; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 973; SI-GISEL-NEXT: s_endpgm 974; 975; VI-SDAG-LABEL: v_test_i32_x_sub_neg17: 976; VI-SDAG: ; %bb.0: 977; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 978; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 979; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 980; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 981; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 982; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 983; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 984; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 985; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 986; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 987; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 988; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, 17, v3 989; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 990; VI-SDAG-NEXT: s_endpgm 991; 992; VI-GISEL-LABEL: v_test_i32_x_sub_neg17: 993; VI-GISEL: ; %bb.0: 994; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 995; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 996; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 997; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 998; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 999; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1000; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1001; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 1002; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1003; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1004; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1005; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1006; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1007; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, 17, v3 1008; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1009; VI-GISEL-NEXT: s_endpgm 1010; 1011; GFX9-LABEL: v_test_i32_x_sub_neg17: 1012; GFX9: ; %bb.0: 1013; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1014; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1015; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1016; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 1017; GFX9-NEXT: s_waitcnt vmcnt(0) 1018; GFX9-NEXT: v_add_u32_e32 v1, 17, v1 1019; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 1020; GFX9-NEXT: s_endpgm 1021; 1022; GFX10-LABEL: v_test_i32_x_sub_neg17: 1023; GFX10: ; %bb.0: 1024; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1025; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1026; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1027; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 1028; GFX10-NEXT: s_waitcnt vmcnt(0) 1029; GFX10-NEXT: v_add_nc_u32_e32 v1, 17, v1 1030; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1031; GFX10-NEXT: s_endpgm 1032; 1033; GFX11-LABEL: v_test_i32_x_sub_neg17: 1034; GFX11: ; %bb.0: 1035; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1036; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1037; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1038; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1039; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1040; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 1041; GFX11-NEXT: s_waitcnt vmcnt(0) 1042; GFX11-NEXT: v_add_nc_u32_e32 v1, 17, v1 1043; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1044; GFX11-NEXT: s_endpgm 1045 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1046 %tid.ext = sext i32 %tid to i64 1047 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 1048 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 1049 %x = load i32, ptr addrspace(1) %gep 1050 %result = sub i32 %x, -17 1051 store i32 %result, ptr addrspace(1) %gep.out 1052 ret void 1053} 1054 1055define amdgpu_kernel void @v_test_i32_neg17_sub_x(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1056; SI-SDAG-LABEL: v_test_i32_neg17_sub_x: 1057; SI-SDAG: ; %bb.0: 1058; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1059; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1060; SI-SDAG-NEXT: s_mov_b32 s6, 0 1061; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1062; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1063; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1064; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 1065; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 1066; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 1067; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1068; SI-SDAG-NEXT: v_sub_i32_e32 v2, vcc, 0xffffffef, v2 1069; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1070; SI-SDAG-NEXT: s_endpgm 1071; 1072; SI-GISEL-LABEL: v_test_i32_neg17_sub_x: 1073; SI-GISEL: ; %bb.0: 1074; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1075; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1076; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1077; SI-GISEL-NEXT: s_mov_b32 s6, 0 1078; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 1079; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1080; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 1081; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 1082; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1083; SI-GISEL-NEXT: v_sub_i32_e32 v2, vcc, 0xffffffef, v2 1084; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 1085; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1086; SI-GISEL-NEXT: s_endpgm 1087; 1088; VI-SDAG-LABEL: v_test_i32_neg17_sub_x: 1089; VI-SDAG: ; %bb.0: 1090; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1091; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 1092; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1093; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1094; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 1095; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1096; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 1097; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1098; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 1099; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1100; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1101; VI-SDAG-NEXT: v_sub_u32_e32 v2, vcc, 0xffffffef, v3 1102; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1103; VI-SDAG-NEXT: s_endpgm 1104; 1105; VI-GISEL-LABEL: v_test_i32_neg17_sub_x: 1106; VI-GISEL: ; %bb.0: 1107; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1108; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 1109; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1110; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1111; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1112; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1113; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1114; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 1115; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1116; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1117; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1118; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1119; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1120; VI-GISEL-NEXT: v_sub_u32_e32 v2, vcc, 0xffffffef, v3 1121; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1122; VI-GISEL-NEXT: s_endpgm 1123; 1124; GFX9-LABEL: v_test_i32_neg17_sub_x: 1125; GFX9: ; %bb.0: 1126; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1127; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1128; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1129; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 1130; GFX9-NEXT: s_waitcnt vmcnt(0) 1131; GFX9-NEXT: v_sub_u32_e32 v1, 0xffffffef, v1 1132; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 1133; GFX9-NEXT: s_endpgm 1134; 1135; GFX10-LABEL: v_test_i32_neg17_sub_x: 1136; GFX10: ; %bb.0: 1137; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1138; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1139; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1140; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 1141; GFX10-NEXT: s_waitcnt vmcnt(0) 1142; GFX10-NEXT: v_sub_nc_u32_e32 v1, 0xffffffef, v1 1143; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1144; GFX10-NEXT: s_endpgm 1145; 1146; GFX11-LABEL: v_test_i32_neg17_sub_x: 1147; GFX11: ; %bb.0: 1148; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1149; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1150; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1151; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1152; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1153; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 1154; GFX11-NEXT: s_waitcnt vmcnt(0) 1155; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0xffffffef, v1 1156; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1157; GFX11-NEXT: s_endpgm 1158 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1159 %tid.ext = sext i32 %tid to i64 1160 %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %tid.ext 1161 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 1162 %x = load i32, ptr addrspace(1) %gep 1163 %result = sub i32 -17, %x 1164 store i32 %result, ptr addrspace(1) %gep.out 1165 ret void 1166} 1167 1168define amdgpu_kernel void @s_test_i32_x_sub_64(i32 %x) #0 { 1169; SI-LABEL: s_test_i32_x_sub_64: 1170; SI: ; %bb.0: 1171; SI-NEXT: s_load_dword s0, s[4:5], 0x9 1172; SI-NEXT: s_waitcnt lgkmcnt(0) 1173; SI-NEXT: s_sub_i32 s0, s0, 64 1174; SI-NEXT: ;;#ASMSTART 1175; SI-NEXT: ; use s0 1176; SI-NEXT: ;;#ASMEND 1177; SI-NEXT: s_endpgm 1178; 1179; VI-LABEL: s_test_i32_x_sub_64: 1180; VI: ; %bb.0: 1181; VI-NEXT: s_load_dword s0, s[4:5], 0x24 1182; VI-NEXT: s_waitcnt lgkmcnt(0) 1183; VI-NEXT: s_sub_i32 s0, s0, 64 1184; VI-NEXT: ;;#ASMSTART 1185; VI-NEXT: ; use s0 1186; VI-NEXT: ;;#ASMEND 1187; VI-NEXT: s_endpgm 1188; 1189; GFX9-LABEL: s_test_i32_x_sub_64: 1190; GFX9: ; %bb.0: 1191; GFX9-NEXT: s_load_dword s0, s[4:5], 0x24 1192; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1193; GFX9-NEXT: s_sub_i32 s0, s0, 64 1194; GFX9-NEXT: ;;#ASMSTART 1195; GFX9-NEXT: ; use s0 1196; GFX9-NEXT: ;;#ASMEND 1197; GFX9-NEXT: s_endpgm 1198; 1199; GFX10-LABEL: s_test_i32_x_sub_64: 1200; GFX10: ; %bb.0: 1201; GFX10-NEXT: s_load_dword s0, s[4:5], 0x24 1202; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1203; GFX10-NEXT: s_sub_i32 s0, s0, 64 1204; GFX10-NEXT: ;;#ASMSTART 1205; GFX10-NEXT: ; use s0 1206; GFX10-NEXT: ;;#ASMEND 1207; GFX10-NEXT: s_endpgm 1208; 1209; GFX11-LABEL: s_test_i32_x_sub_64: 1210; GFX11: ; %bb.0: 1211; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24 1212; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1213; GFX11-NEXT: s_sub_i32 s0, s0, 64 1214; GFX11-NEXT: ;;#ASMSTART 1215; GFX11-NEXT: ; use s0 1216; GFX11-NEXT: ;;#ASMEND 1217; GFX11-NEXT: s_endpgm 1218 %result = sub i32 %x, 64 1219 call void asm sideeffect "; use $0", "s"(i32 %result) 1220 ret void 1221} 1222 1223define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1224; SI-SDAG-LABEL: v_test_i16_x_sub_64: 1225; SI-SDAG: ; %bb.0: 1226; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1227; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1228; SI-SDAG-NEXT: s_mov_b32 s6, 0 1229; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1230; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1231; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1232; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 1233; SI-SDAG-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 1234; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 1235; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1236; SI-SDAG-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2 1237; SI-SDAG-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 1238; SI-SDAG-NEXT: s_endpgm 1239; 1240; SI-GISEL-LABEL: v_test_i16_x_sub_64: 1241; SI-GISEL: ; %bb.0: 1242; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1243; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1244; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1245; SI-GISEL-NEXT: s_mov_b32 s6, 0 1246; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 1247; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1248; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 1249; SI-GISEL-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 1250; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1251; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2 1252; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 1253; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 1254; SI-GISEL-NEXT: s_endpgm 1255; 1256; VI-SDAG-LABEL: v_test_i16_x_sub_64: 1257; VI-SDAG: ; %bb.0: 1258; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1259; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 1, v0 1260; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1261; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1262; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 1263; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1264; VI-SDAG-NEXT: flat_load_ushort v3, v[0:1] 1265; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1266; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 1267; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1268; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1269; VI-SDAG-NEXT: v_subrev_u16_e32 v2, 64, v3 1270; VI-SDAG-NEXT: flat_store_short v[0:1], v2 1271; VI-SDAG-NEXT: s_endpgm 1272; 1273; VI-GISEL-LABEL: v_test_i16_x_sub_64: 1274; VI-GISEL: ; %bb.0: 1275; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1276; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v0 1277; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1278; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1279; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1280; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1281; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1282; VI-GISEL-NEXT: flat_load_ushort v3, v[0:1] 1283; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1284; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1285; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1286; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1287; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1288; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3 1289; VI-GISEL-NEXT: flat_store_short v[0:1], v2 1290; VI-GISEL-NEXT: s_endpgm 1291; 1292; GFX9-SDAG-LABEL: v_test_i16_x_sub_64: 1293; GFX9-SDAG: ; %bb.0: 1294; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1295; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1296; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1297; GFX9-SDAG-NEXT: global_load_ushort v1, v0, s[2:3] 1298; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1299; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1 1300; GFX9-SDAG-NEXT: global_store_short v0, v1, s[0:1] 1301; GFX9-SDAG-NEXT: s_endpgm 1302; 1303; GFX9-GISEL-LABEL: v_test_i16_x_sub_64: 1304; GFX9-GISEL: ; %bb.0: 1305; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1306; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1307; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1308; GFX9-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] 1309; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1310; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1 1311; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1] 1312; GFX9-GISEL-NEXT: s_endpgm 1313; 1314; GFX10-SDAG-LABEL: v_test_i16_x_sub_64: 1315; GFX10-SDAG: ; %bb.0: 1316; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1317; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1318; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1319; GFX10-SDAG-NEXT: global_load_ushort v1, v0, s[2:3] 1320; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1321; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 1322; GFX10-SDAG-NEXT: global_store_short v0, v1, s[0:1] 1323; GFX10-SDAG-NEXT: s_endpgm 1324; 1325; GFX10-GISEL-LABEL: v_test_i16_x_sub_64: 1326; GFX10-GISEL: ; %bb.0: 1327; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1328; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1329; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1330; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] 1331; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1332; GFX10-GISEL-NEXT: v_add_nc_u16 v1, 0xffc0, v1 1333; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] 1334; GFX10-GISEL-NEXT: s_endpgm 1335; 1336; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64: 1337; GFX11-SDAG-TRUE16: ; %bb.0: 1338; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1339; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1340; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1341; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1342; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 1343; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] 1344; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 1345; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 1346; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] 1347; GFX11-SDAG-TRUE16-NEXT: s_endpgm 1348; 1349; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64: 1350; GFX11-SDAG-FAKE16: ; %bb.0: 1351; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1352; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1353; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1354; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1355; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 1356; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] 1357; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 1358; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 1359; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] 1360; GFX11-SDAG-FAKE16-NEXT: s_endpgm 1361; 1362; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64: 1363; GFX11-GISEL-TRUE16: ; %bb.0: 1364; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1365; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1366; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1367; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1368; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 1369; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] 1370; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 1371; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, 0xffc0, v1.l 1372; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] 1373; GFX11-GISEL-TRUE16-NEXT: s_endpgm 1374; 1375; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64: 1376; GFX11-GISEL-FAKE16: ; %bb.0: 1377; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1378; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1379; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1380; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1381; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 1382; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] 1383; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 1384; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, 0xffc0, v1 1385; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] 1386; GFX11-GISEL-FAKE16-NEXT: s_endpgm 1387 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1388 %tid.ext = sext i32 %tid to i64 1389 %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext 1390 %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext 1391 %x = load i16, ptr addrspace(1) %gep 1392 %result = sub i16 %x, 64 1393 store i16 %result, ptr addrspace(1) %gep.out 1394 ret void 1395} 1396 1397define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1398; SI-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1399; SI-SDAG: ; %bb.0: 1400; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1401; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1402; SI-SDAG-NEXT: s_mov_b32 s6, 0 1403; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1404; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0 1405; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1406; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 1407; SI-SDAG-NEXT: buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64 1408; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 1409; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1410; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1411; SI-SDAG-NEXT: v_subrev_i32_e32 v0, vcc, 64, v3 1412; SI-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1413; SI-SDAG-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 1414; SI-SDAG-NEXT: s_endpgm 1415; 1416; SI-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1417; SI-GISEL: ; %bb.0: 1418; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1419; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1420; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0 1421; SI-GISEL-NEXT: s_mov_b32 s6, 0 1422; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 1423; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1424; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 1425; SI-GISEL-NEXT: buffer_load_ushort v3, v[1:2], s[4:7], 0 addr64 1426; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1427; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1428; SI-GISEL-NEXT: v_add_i32_e32 v0, vcc, 0xffffffc0, v3 1429; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1430; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 1431; SI-GISEL-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 1432; SI-GISEL-NEXT: s_endpgm 1433; 1434; VI-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1435; VI-SDAG: ; %bb.0: 1436; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1437; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1438; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1439; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1440; VI-SDAG-NEXT: v_mov_b32_e32 v2, s3 1441; VI-SDAG-NEXT: v_add_u32_e32 v1, vcc, s2, v1 1442; VI-SDAG-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc 1443; VI-SDAG-NEXT: flat_load_ushort v2, v[1:2] 1444; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1445; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v0 1446; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1447; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1448; VI-SDAG-NEXT: v_subrev_u16_e32 v2, 64, v2 1449; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1450; VI-SDAG-NEXT: s_endpgm 1451; 1452; VI-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1453; VI-GISEL: ; %bb.0: 1454; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1455; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 1, v0 1456; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1457; VI-GISEL-NEXT: v_mov_b32_e32 v1, s2 1458; VI-GISEL-NEXT: v_mov_b32_e32 v2, s3 1459; VI-GISEL-NEXT: v_add_u32_e32 v1, vcc, v1, v3 1460; VI-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc 1461; VI-GISEL-NEXT: flat_load_ushort v2, v[1:2] 1462; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 2, v0 1463; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1464; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1465; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v3 1466; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1467; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1468; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v2 1469; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1470; VI-GISEL-NEXT: s_endpgm 1471; 1472; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1473; GFX9-SDAG: ; %bb.0: 1474; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1475; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1476; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1477; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1478; GFX9-SDAG-NEXT: global_load_ushort v1, v1, s[2:3] 1479; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1480; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1 1481; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 1482; GFX9-SDAG-NEXT: s_endpgm 1483; 1484; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1485; GFX9-GISEL: ; %bb.0: 1486; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1487; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1488; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1489; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1490; GFX9-GISEL-NEXT: global_load_ushort v1, v1, s[2:3] 1491; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1492; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1 1493; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 1494; GFX9-GISEL-NEXT: s_endpgm 1495; 1496; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1497; GFX10-SDAG: ; %bb.0: 1498; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1499; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1500; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1501; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1502; GFX10-SDAG-NEXT: global_load_ushort v1, v1, s[2:3] 1503; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1504; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 1505; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 1506; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 1507; GFX10-SDAG-NEXT: s_endpgm 1508; 1509; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1510; GFX10-GISEL: ; %bb.0: 1511; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1512; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1513; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1514; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1515; GFX10-GISEL-NEXT: global_load_ushort v1, v1, s[2:3] 1516; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1517; GFX10-GISEL-NEXT: v_add_nc_u16 v1, 0xffc0, v1 1518; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 1519; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 1520; GFX10-GISEL-NEXT: s_endpgm 1521; 1522; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1523; GFX11-SDAG-TRUE16: ; %bb.0: 1524; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1525; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 1526; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1527; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 1528; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1529; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 1530; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3] 1531; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 1532; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 1533; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1534; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 1535; GFX11-SDAG-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] 1536; GFX11-SDAG-TRUE16-NEXT: s_endpgm 1537; 1538; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1539; GFX11-SDAG-FAKE16: ; %bb.0: 1540; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1541; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1542; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1543; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1544; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1545; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 1546; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3] 1547; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 1548; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 1549; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1550; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 1551; GFX11-SDAG-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] 1552; GFX11-SDAG-FAKE16-NEXT: s_endpgm 1553; 1554; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1555; GFX11-GISEL-TRUE16: ; %bb.0: 1556; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1557; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 1558; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1559; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 1560; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 1561; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 1562; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3] 1563; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 1564; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, 0xffc0, v0.l 1565; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1566; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 1567; GFX11-GISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] 1568; GFX11-GISEL-TRUE16-NEXT: s_endpgm 1569; 1570; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: 1571; GFX11-GISEL-FAKE16: ; %bb.0: 1572; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1573; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1574; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1575; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1576; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1577; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 1578; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3] 1579; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 1580; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, 0xffc0, v1 1581; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1582; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 1583; GFX11-GISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] 1584; GFX11-GISEL-FAKE16-NEXT: s_endpgm 1585 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1586 %tid.ext = sext i32 %tid to i64 1587 %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext 1588 %gep.out = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext 1589 %x = load i16, ptr addrspace(1) %gep 1590 %result = sub i16 %x, 64 1591 %zext = zext i16 %result to i32 1592 store i32 %zext, ptr addrspace(1) %gep.out 1593 ret void 1594} 1595 1596define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1597; SI-SDAG-LABEL: v_test_i16_x_sub_64_multi_use: 1598; SI-SDAG: ; %bb.0: 1599; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1600; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1601; SI-SDAG-NEXT: s_mov_b32 s6, 0 1602; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1603; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1604; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1605; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 1606; SI-SDAG-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc 1607; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1608; SI-SDAG-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 glc 1609; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1610; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 1611; SI-SDAG-NEXT: v_subrev_i32_e32 v2, vcc, 64, v2 1612; SI-SDAG-NEXT: v_subrev_i32_e32 v3, vcc, 64, v3 1613; SI-SDAG-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 1614; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1615; SI-SDAG-NEXT: buffer_store_short v3, v[0:1], s[0:3], 0 addr64 1616; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1617; SI-SDAG-NEXT: s_endpgm 1618; 1619; SI-GISEL-LABEL: v_test_i16_x_sub_64_multi_use: 1620; SI-GISEL: ; %bb.0: 1621; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1622; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1623; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1624; SI-GISEL-NEXT: s_mov_b32 s6, 0 1625; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 1626; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1627; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 1628; SI-GISEL-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc 1629; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1630; SI-GISEL-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 glc 1631; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1632; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 1633; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2 1634; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3 1635; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 1636; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1637; SI-GISEL-NEXT: buffer_store_short v3, v[0:1], s[0:3], 0 addr64 1638; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1639; SI-GISEL-NEXT: s_endpgm 1640; 1641; VI-SDAG-LABEL: v_test_i16_x_sub_64_multi_use: 1642; VI-SDAG: ; %bb.0: 1643; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1644; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 1, v0 1645; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1646; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1647; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 1648; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1649; VI-SDAG-NEXT: flat_load_ushort v3, v[0:1] glc 1650; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1651; VI-SDAG-NEXT: flat_load_ushort v4, v[0:1] glc 1652; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1653; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1654; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 1655; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1656; VI-SDAG-NEXT: v_subrev_u16_e32 v2, 64, v3 1657; VI-SDAG-NEXT: v_subrev_u16_e32 v3, 64, v4 1658; VI-SDAG-NEXT: flat_store_short v[0:1], v2 1659; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1660; VI-SDAG-NEXT: flat_store_short v[0:1], v3 1661; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1662; VI-SDAG-NEXT: s_endpgm 1663; 1664; VI-GISEL-LABEL: v_test_i16_x_sub_64_multi_use: 1665; VI-GISEL: ; %bb.0: 1666; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1667; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v0 1668; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1669; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1670; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1671; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1672; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1673; VI-GISEL-NEXT: flat_load_ushort v3, v[0:1] glc 1674; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1675; VI-GISEL-NEXT: flat_load_ushort v4, v[0:1] glc 1676; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1677; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1678; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1679; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1680; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1681; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3 1682; VI-GISEL-NEXT: v_add_u16_e32 v3, 0xffc0, v4 1683; VI-GISEL-NEXT: flat_store_short v[0:1], v2 1684; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1685; VI-GISEL-NEXT: flat_store_short v[0:1], v3 1686; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1687; VI-GISEL-NEXT: s_endpgm 1688; 1689; GFX9-SDAG-LABEL: v_test_i16_x_sub_64_multi_use: 1690; GFX9-SDAG: ; %bb.0: 1691; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1692; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1693; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1694; GFX9-SDAG-NEXT: global_load_ushort v1, v0, s[2:3] glc 1695; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1696; GFX9-SDAG-NEXT: global_load_ushort v2, v0, s[2:3] glc 1697; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1698; GFX9-SDAG-NEXT: v_subrev_u16_e32 v1, 64, v1 1699; GFX9-SDAG-NEXT: v_subrev_u16_e32 v2, 64, v2 1700; GFX9-SDAG-NEXT: global_store_short v0, v1, s[0:1] 1701; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1702; GFX9-SDAG-NEXT: global_store_short v0, v2, s[0:1] 1703; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1704; GFX9-SDAG-NEXT: s_endpgm 1705; 1706; GFX9-GISEL-LABEL: v_test_i16_x_sub_64_multi_use: 1707; GFX9-GISEL: ; %bb.0: 1708; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1709; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1710; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1711; GFX9-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] glc 1712; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1713; GFX9-GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc 1714; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1715; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 0xffc0, v1 1716; GFX9-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v2 1717; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1] 1718; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1719; GFX9-GISEL-NEXT: global_store_short v0, v2, s[0:1] 1720; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1721; GFX9-GISEL-NEXT: s_endpgm 1722; 1723; GFX10-SDAG-LABEL: v_test_i16_x_sub_64_multi_use: 1724; GFX10-SDAG: ; %bb.0: 1725; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1726; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1727; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1728; GFX10-SDAG-NEXT: global_load_ushort v1, v0, s[2:3] glc dlc 1729; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1730; GFX10-SDAG-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 1731; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1732; GFX10-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 1733; GFX10-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64 1734; GFX10-SDAG-NEXT: global_store_short v0, v1, s[0:1] 1735; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1736; GFX10-SDAG-NEXT: global_store_short v0, v2, s[0:1] 1737; GFX10-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1738; GFX10-SDAG-NEXT: s_endpgm 1739; 1740; GFX10-GISEL-LABEL: v_test_i16_x_sub_64_multi_use: 1741; GFX10-GISEL: ; %bb.0: 1742; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1743; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1744; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1745; GFX10-GISEL-NEXT: global_load_ushort v1, v0, s[2:3] glc dlc 1746; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1747; GFX10-GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 1748; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1749; GFX10-GISEL-NEXT: v_add_nc_u16 v1, 0xffc0, v1 1750; GFX10-GISEL-NEXT: v_add_nc_u16 v2, 0xffc0, v2 1751; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1] 1752; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1753; GFX10-GISEL-NEXT: global_store_short v0, v2, s[0:1] 1754; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1755; GFX10-GISEL-NEXT: s_endpgm 1756; 1757; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use: 1758; GFX11-SDAG-TRUE16: ; %bb.0: 1759; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1760; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1761; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1762; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1763; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 1764; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc 1765; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 1766; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc 1767; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 1768; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 1769; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l 1770; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1771; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l 1772; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.h, v0.h, 64 1773; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1774; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h 1775; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v2, s[0:1] dlc 1776; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 1777; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc 1778; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 1779; GFX11-SDAG-TRUE16-NEXT: s_endpgm 1780; 1781; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use: 1782; GFX11-SDAG-FAKE16: ; %bb.0: 1783; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1784; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1785; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1786; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1787; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 1788; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc 1789; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 1790; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 1791; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 1792; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 1793; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v2, v2, 64 1794; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc 1795; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 1796; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc 1797; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 1798; GFX11-SDAG-FAKE16-NEXT: s_endpgm 1799; 1800; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use: 1801; GFX11-GISEL-TRUE16: ; %bb.0: 1802; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1803; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1804; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1805; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1806; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 1807; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc 1808; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 1809; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 1810; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 1811; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, 0xffc0, v1.l 1812; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v2.l, 0xffc0, v2.l 1813; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc 1814; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 1815; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc 1816; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 1817; GFX11-GISEL-TRUE16-NEXT: s_endpgm 1818; 1819; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use: 1820; GFX11-GISEL-FAKE16: ; %bb.0: 1821; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1822; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1823; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 1824; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1825; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 1826; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc 1827; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 1828; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 1829; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 1830; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, 0xffc0, v1 1831; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, 0xffc0, v2 1832; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc 1833; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 1834; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc 1835; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 1836; GFX11-GISEL-FAKE16-NEXT: s_endpgm 1837 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1838 %tid.ext = sext i32 %tid to i64 1839 %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext 1840 %gep.out = getelementptr inbounds i16, ptr addrspace(1) %out, i64 %tid.ext 1841 %x = load volatile i16, ptr addrspace(1) %gep 1842 %y = load volatile i16, ptr addrspace(1) %gep 1843 %result0 = sub i16 %x, 64 1844 %result1 = sub i16 %y, 64 1845 store volatile i16 %result0, ptr addrspace(1) %gep.out 1846 store volatile i16 %result1, ptr addrspace(1) %gep.out 1847 ret void 1848} 1849 1850define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1851; SI-SDAG-LABEL: v_test_v2i16_x_sub_64_64: 1852; SI-SDAG: ; %bb.0: 1853; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1854; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1855; SI-SDAG-NEXT: s_mov_b32 s6, 0 1856; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1857; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1858; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1859; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 1860; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 1861; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 1862; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1863; SI-SDAG-NEXT: v_subrev_i32_e32 v3, vcc, 64, v2 1864; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 1865; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 1866; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xffc00000, v2 1867; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1868; SI-SDAG-NEXT: s_endpgm 1869; 1870; SI-GISEL-LABEL: v_test_v2i16_x_sub_64_64: 1871; SI-GISEL: ; %bb.0: 1872; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1873; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1874; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1875; SI-GISEL-NEXT: s_mov_b32 s6, 0 1876; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 1877; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1878; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 1879; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 1880; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1881; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1882; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2 1883; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3 1884; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 1885; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 1886; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1887; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 1888; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 1889; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1890; SI-GISEL-NEXT: s_endpgm 1891; 1892; VI-SDAG-LABEL: v_test_v2i16_x_sub_64_64: 1893; VI-SDAG: ; %bb.0: 1894; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1895; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 1896; VI-SDAG-NEXT: v_mov_b32_e32 v4, 64 1897; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1898; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1899; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 1900; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1901; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 1902; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1903; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 1904; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1905; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1906; VI-SDAG-NEXT: v_sub_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1907; VI-SDAG-NEXT: v_subrev_u16_e32 v3, 64, v3 1908; VI-SDAG-NEXT: v_or_b32_e32 v2, v3, v2 1909; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1910; VI-SDAG-NEXT: s_endpgm 1911; 1912; VI-GISEL-LABEL: v_test_v2i16_x_sub_64_64: 1913; VI-GISEL: ; %bb.0: 1914; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1915; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 1916; VI-GISEL-NEXT: v_not_b32_e32 v4, 63 1917; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1918; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1919; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1920; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1921; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1922; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 1923; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1924; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1925; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1926; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1927; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1928; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3 1929; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1930; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 1931; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1932; VI-GISEL-NEXT: s_endpgm 1933; 1934; GFX9-LABEL: v_test_v2i16_x_sub_64_64: 1935; GFX9: ; %bb.0: 1936; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1937; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1938; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1939; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 1940; GFX9-NEXT: s_waitcnt vmcnt(0) 1941; GFX9-NEXT: v_pk_sub_i16 v1, v1, 64 op_sel_hi:[1,0] 1942; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 1943; GFX9-NEXT: s_endpgm 1944; 1945; GFX10-LABEL: v_test_v2i16_x_sub_64_64: 1946; GFX10: ; %bb.0: 1947; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1948; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1949; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1950; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 1951; GFX10-NEXT: s_waitcnt vmcnt(0) 1952; GFX10-NEXT: v_pk_sub_i16 v1, v1, 64 op_sel_hi:[1,0] 1953; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1954; GFX10-NEXT: s_endpgm 1955; 1956; GFX11-LABEL: v_test_v2i16_x_sub_64_64: 1957; GFX11: ; %bb.0: 1958; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1959; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1960; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1961; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1962; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1963; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 1964; GFX11-NEXT: s_waitcnt vmcnt(0) 1965; GFX11-NEXT: v_pk_sub_i16 v1, v1, 64 op_sel_hi:[1,0] 1966; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1967; GFX11-NEXT: s_endpgm 1968 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1969 %tid.ext = sext i32 %tid to i64 1970 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 1971 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 1972 %x = load <2 x i16>, ptr addrspace(1) %gep 1973 %result = sub <2 x i16> %x, <i16 64, i16 64> 1974 store <2 x i16> %result, ptr addrspace(1) %gep.out 1975 ret void 1976} 1977 1978define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1979; SI-SDAG-LABEL: v_test_v2i16_x_sub_7_64: 1980; SI-SDAG: ; %bb.0: 1981; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1982; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1983; SI-SDAG-NEXT: s_mov_b32 s6, 0 1984; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1985; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1986; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1987; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 1988; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 1989; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 1990; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1991; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, -7, v2 1992; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 1993; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 1994; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xffc00000, v2 1995; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1996; SI-SDAG-NEXT: s_endpgm 1997; 1998; SI-GISEL-LABEL: v_test_v2i16_x_sub_7_64: 1999; SI-GISEL: ; %bb.0: 2000; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2001; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2002; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2003; SI-GISEL-NEXT: s_mov_b32 s6, 0 2004; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2005; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2006; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2007; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2008; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2009; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2010; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -7, v2 2011; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffc0, v3 2012; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2013; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2014; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2015; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2016; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2017; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2018; SI-GISEL-NEXT: s_endpgm 2019; 2020; VI-SDAG-LABEL: v_test_v2i16_x_sub_7_64: 2021; VI-SDAG: ; %bb.0: 2022; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2023; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2024; VI-SDAG-NEXT: v_mov_b32_e32 v4, 64 2025; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2026; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2027; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2028; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2029; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2030; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2031; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2032; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2033; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2034; VI-SDAG-NEXT: v_add_u16_e32 v2, -7, v3 2035; VI-SDAG-NEXT: v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2036; VI-SDAG-NEXT: v_or_b32_e32 v2, v2, v3 2037; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2038; VI-SDAG-NEXT: s_endpgm 2039; 2040; VI-GISEL-LABEL: v_test_v2i16_x_sub_7_64: 2041; VI-GISEL: ; %bb.0: 2042; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2043; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2044; VI-GISEL-NEXT: v_not_b32_e32 v4, 63 2045; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2046; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2047; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2048; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2049; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2050; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2051; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2052; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2053; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2054; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2055; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2056; VI-GISEL-NEXT: v_add_u16_e32 v2, -7, v3 2057; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2058; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2059; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2060; VI-GISEL-NEXT: s_endpgm 2061; 2062; GFX9-SDAG-LABEL: v_test_v2i16_x_sub_7_64: 2063; GFX9-SDAG: ; %bb.0: 2064; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2065; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2066; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2067; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 2068; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x400007 2069; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2070; GFX9-SDAG-NEXT: v_pk_sub_i16 v1, v1, s2 2071; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 2072; GFX9-SDAG-NEXT: s_endpgm 2073; 2074; GFX9-GISEL-LABEL: v_test_v2i16_x_sub_7_64: 2075; GFX9-GISEL: ; %bb.0: 2076; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2077; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2078; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x400007 2079; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2080; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 2081; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2082; GFX9-GISEL-NEXT: v_pk_sub_i16 v1, v1, v2 2083; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 2084; GFX9-GISEL-NEXT: s_endpgm 2085; 2086; GFX10-LABEL: v_test_v2i16_x_sub_7_64: 2087; GFX10: ; %bb.0: 2088; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2089; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2090; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2091; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 2092; GFX10-NEXT: s_waitcnt vmcnt(0) 2093; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x400007 2094; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2095; GFX10-NEXT: s_endpgm 2096; 2097; GFX11-LABEL: v_test_v2i16_x_sub_7_64: 2098; GFX11: ; %bb.0: 2099; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2100; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2101; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2102; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2103; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2104; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 2105; GFX11-NEXT: s_waitcnt vmcnt(0) 2106; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x400007 2107; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2108; GFX11-NEXT: s_endpgm 2109 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2110 %tid.ext = sext i32 %tid to i64 2111 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 2112 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 2113 %x = load <2 x i16>, ptr addrspace(1) %gep 2114 %result = sub <2 x i16> %x, <i16 7, i16 64> 2115 store <2 x i16> %result, ptr addrspace(1) %gep.out 2116 ret void 2117} 2118 2119define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2120; SI-SDAG-LABEL: v_test_v2i16_x_sub_64_123: 2121; SI-SDAG: ; %bb.0: 2122; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2123; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 2124; SI-SDAG-NEXT: s_mov_b32 s6, 0 2125; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2126; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2127; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2128; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 2129; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2130; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 2131; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2132; SI-SDAG-NEXT: v_subrev_i32_e32 v3, vcc, 64, v2 2133; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 2134; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 2135; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xff850000, v2 2136; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2137; SI-SDAG-NEXT: s_endpgm 2138; 2139; SI-GISEL-LABEL: v_test_v2i16_x_sub_64_123: 2140; SI-GISEL: ; %bb.0: 2141; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2142; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2143; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2144; SI-GISEL-NEXT: s_mov_b32 s6, 0 2145; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2146; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2147; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2148; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2149; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2150; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2151; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v2 2152; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffff85, v3 2153; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2154; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2155; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2156; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2157; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2158; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2159; SI-GISEL-NEXT: s_endpgm 2160; 2161; VI-SDAG-LABEL: v_test_v2i16_x_sub_64_123: 2162; VI-SDAG: ; %bb.0: 2163; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2164; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2165; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffff85 2166; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2167; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2168; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2169; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2170; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2171; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2172; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2173; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2174; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2175; VI-SDAG-NEXT: v_add_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2176; VI-SDAG-NEXT: v_subrev_u16_e32 v3, 64, v3 2177; VI-SDAG-NEXT: v_or_b32_e32 v2, v3, v2 2178; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2179; VI-SDAG-NEXT: s_endpgm 2180; 2181; VI-GISEL-LABEL: v_test_v2i16_x_sub_64_123: 2182; VI-GISEL: ; %bb.0: 2183; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2184; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2185; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffff85 2186; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2187; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2188; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2189; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2190; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2191; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2192; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2193; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2194; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2195; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2196; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2197; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffc0, v3 2198; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2199; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2200; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2201; VI-GISEL-NEXT: s_endpgm 2202; 2203; GFX9-SDAG-LABEL: v_test_v2i16_x_sub_64_123: 2204; GFX9-SDAG: ; %bb.0: 2205; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2206; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2207; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2208; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 2209; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x7b0040 2210; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2211; GFX9-SDAG-NEXT: v_pk_sub_i16 v1, v1, s2 2212; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 2213; GFX9-SDAG-NEXT: s_endpgm 2214; 2215; GFX9-GISEL-LABEL: v_test_v2i16_x_sub_64_123: 2216; GFX9-GISEL: ; %bb.0: 2217; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2218; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2219; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x7b0040 2220; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2221; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 2222; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2223; GFX9-GISEL-NEXT: v_pk_sub_i16 v1, v1, v2 2224; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 2225; GFX9-GISEL-NEXT: s_endpgm 2226; 2227; GFX10-LABEL: v_test_v2i16_x_sub_64_123: 2228; GFX10: ; %bb.0: 2229; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2230; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2231; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2232; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 2233; GFX10-NEXT: s_waitcnt vmcnt(0) 2234; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x7b0040 2235; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2236; GFX10-NEXT: s_endpgm 2237; 2238; GFX11-LABEL: v_test_v2i16_x_sub_64_123: 2239; GFX11: ; %bb.0: 2240; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2241; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2242; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2243; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2244; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2245; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 2246; GFX11-NEXT: s_waitcnt vmcnt(0) 2247; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x7b0040 2248; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2249; GFX11-NEXT: s_endpgm 2250 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2251 %tid.ext = sext i32 %tid to i64 2252 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 2253 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 2254 %x = load <2 x i16>, ptr addrspace(1) %gep 2255 %result = sub <2 x i16> %x, <i16 64, i16 123> 2256 store <2 x i16> %result, ptr addrspace(1) %gep.out 2257 ret void 2258} 2259 2260; Can fold 0 and inline immediate in other half. 2261define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2262; SI-SDAG-LABEL: v_test_v2i16_x_sub_7_0: 2263; SI-SDAG: ; %bb.0: 2264; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2265; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 2266; SI-SDAG-NEXT: s_mov_b32 s6, 0 2267; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2268; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2269; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2270; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 2271; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2272; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 2273; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2274; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, -7, v2 2275; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff 2276; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v3, v2 2277; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2278; SI-SDAG-NEXT: s_endpgm 2279; 2280; SI-GISEL-LABEL: v_test_v2i16_x_sub_7_0: 2281; SI-GISEL: ; %bb.0: 2282; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2283; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2284; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2285; SI-GISEL-NEXT: s_mov_b32 s6, 0 2286; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2287; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2288; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2289; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2290; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2291; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2292; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -7, v2 2293; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2294; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2295; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2296; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2297; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2298; SI-GISEL-NEXT: s_endpgm 2299; 2300; VI-SDAG-LABEL: v_test_v2i16_x_sub_7_0: 2301; VI-SDAG: ; %bb.0: 2302; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2303; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2304; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2305; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2306; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2307; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2308; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2309; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2310; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2311; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2312; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2313; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xffff0000, v3 2314; VI-SDAG-NEXT: v_add_u16_e32 v3, -7, v3 2315; VI-SDAG-NEXT: v_or_b32_e32 v2, v3, v2 2316; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2317; VI-SDAG-NEXT: s_endpgm 2318; 2319; VI-GISEL-LABEL: v_test_v2i16_x_sub_7_0: 2320; VI-GISEL: ; %bb.0: 2321; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2322; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2323; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2324; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2325; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2326; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2327; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2328; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2329; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2330; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2331; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2332; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2333; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2334; VI-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 2335; VI-GISEL-NEXT: v_add_u16_e32 v3, -7, v3 2336; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2337; VI-GISEL-NEXT: v_or_b32_e32 v2, v3, v2 2338; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2339; VI-GISEL-NEXT: s_endpgm 2340; 2341; GFX9-LABEL: v_test_v2i16_x_sub_7_0: 2342; GFX9: ; %bb.0: 2343; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2344; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2345; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2346; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 2347; GFX9-NEXT: s_waitcnt vmcnt(0) 2348; GFX9-NEXT: v_pk_sub_i16 v1, v1, 7 2349; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 2350; GFX9-NEXT: s_endpgm 2351; 2352; GFX10-LABEL: v_test_v2i16_x_sub_7_0: 2353; GFX10: ; %bb.0: 2354; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2355; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2356; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2357; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 2358; GFX10-NEXT: s_waitcnt vmcnt(0) 2359; GFX10-NEXT: v_pk_sub_i16 v1, v1, 7 2360; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2361; GFX10-NEXT: s_endpgm 2362; 2363; GFX11-LABEL: v_test_v2i16_x_sub_7_0: 2364; GFX11: ; %bb.0: 2365; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2366; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2367; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2368; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2369; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2370; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 2371; GFX11-NEXT: s_waitcnt vmcnt(0) 2372; GFX11-NEXT: v_pk_sub_i16 v1, v1, 7 2373; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2374; GFX11-NEXT: s_endpgm 2375 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2376 %tid.ext = sext i32 %tid to i64 2377 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 2378 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 2379 %x = load <2 x i16>, ptr addrspace(1) %gep 2380 %result = sub <2 x i16> %x, <i16 7, i16 0> 2381 store <2 x i16> %result, ptr addrspace(1) %gep.out 2382 ret void 2383} 2384 2385; Can fold 0 and inline immediate in other half. 2386define amdgpu_kernel void @v_test_v2i16_x_sub_0_16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2387; SI-SDAG-LABEL: v_test_v2i16_x_sub_0_16: 2388; SI-SDAG: ; %bb.0: 2389; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2390; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 2391; SI-SDAG-NEXT: s_mov_b32 s6, 0 2392; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2393; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2394; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2395; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 2396; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2397; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 2398; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2399; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xfff00000, v2 2400; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2401; SI-SDAG-NEXT: s_endpgm 2402; 2403; SI-GISEL-LABEL: v_test_v2i16_x_sub_0_16: 2404; SI-GISEL: ; %bb.0: 2405; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2406; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2407; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2408; SI-GISEL-NEXT: s_mov_b32 s6, 0 2409; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2410; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2411; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2412; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2413; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2414; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2415; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, -16, v3 2416; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2417; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2418; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2419; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2420; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2421; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2422; SI-GISEL-NEXT: s_endpgm 2423; 2424; VI-SDAG-LABEL: v_test_v2i16_x_sub_0_16: 2425; VI-SDAG: ; %bb.0: 2426; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2427; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2428; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2429; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2430; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2431; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2432; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2433; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2434; VI-SDAG-NEXT: v_mov_b32_e32 v2, -16 2435; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2436; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2437; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2438; VI-SDAG-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2439; VI-SDAG-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2440; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2441; VI-SDAG-NEXT: s_endpgm 2442; 2443; VI-GISEL-LABEL: v_test_v2i16_x_sub_0_16: 2444; VI-GISEL: ; %bb.0: 2445; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2446; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2447; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2448; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2449; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2450; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2451; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2452; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2453; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2454; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2455; VI-GISEL-NEXT: v_mov_b32_e32 v2, -16 2456; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2457; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2458; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2459; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2460; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2461; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2462; VI-GISEL-NEXT: s_endpgm 2463; 2464; GFX9-LABEL: v_test_v2i16_x_sub_0_16: 2465; GFX9: ; %bb.0: 2466; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2467; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2468; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2469; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 2470; GFX9-NEXT: s_waitcnt vmcnt(0) 2471; GFX9-NEXT: v_pk_sub_i16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0] 2472; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 2473; GFX9-NEXT: s_endpgm 2474; 2475; GFX10-LABEL: v_test_v2i16_x_sub_0_16: 2476; GFX10: ; %bb.0: 2477; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2478; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2479; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2480; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 2481; GFX10-NEXT: s_waitcnt vmcnt(0) 2482; GFX10-NEXT: v_pk_sub_i16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0] 2483; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2484; GFX10-NEXT: s_endpgm 2485; 2486; GFX11-LABEL: v_test_v2i16_x_sub_0_16: 2487; GFX11: ; %bb.0: 2488; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2489; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2490; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2491; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2492; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2493; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 2494; GFX11-NEXT: s_waitcnt vmcnt(0) 2495; GFX11-NEXT: v_pk_sub_i16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0] 2496; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2497; GFX11-NEXT: s_endpgm 2498 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2499 %tid.ext = sext i32 %tid to i64 2500 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 2501 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 2502 %x = load <2 x i16>, ptr addrspace(1) %gep 2503 %result = sub <2 x i16> %x, <i16 0, i16 16> 2504 store <2 x i16> %result, ptr addrspace(1) %gep.out 2505 ret void 2506} 2507 2508define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2509; SI-SDAG-LABEL: v_test_v2i16_x_sub_0_1_0: 2510; SI-SDAG: ; %bb.0: 2511; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2512; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 2513; SI-SDAG-NEXT: s_mov_b32 s6, 0 2514; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2515; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2516; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2517; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 2518; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2519; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 2520; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2521; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0x3c000000, v2 2522; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2523; SI-SDAG-NEXT: s_endpgm 2524; 2525; SI-GISEL-LABEL: v_test_v2i16_x_sub_0_1_0: 2526; SI-GISEL: ; %bb.0: 2527; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2528; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2529; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2530; SI-GISEL-NEXT: s_mov_b32 s6, 0 2531; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2532; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2533; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2534; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2535; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2536; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2537; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0x3c00, v3 2538; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2539; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2540; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2541; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2542; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2543; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2544; SI-GISEL-NEXT: s_endpgm 2545; 2546; VI-SDAG-LABEL: v_test_v2i16_x_sub_0_1_0: 2547; VI-SDAG: ; %bb.0: 2548; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2549; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2550; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2551; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2552; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2553; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2554; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2555; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2556; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00 2557; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2558; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2559; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2560; VI-SDAG-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2561; VI-SDAG-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2562; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2563; VI-SDAG-NEXT: s_endpgm 2564; 2565; VI-GISEL-LABEL: v_test_v2i16_x_sub_0_1_0: 2566; VI-GISEL: ; %bb.0: 2567; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2568; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2569; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2570; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2571; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2572; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2573; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2574; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2575; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2576; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2577; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3c00 2578; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2579; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2580; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2581; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2582; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2583; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2584; VI-GISEL-NEXT: s_endpgm 2585; 2586; GFX9-SDAG-LABEL: v_test_v2i16_x_sub_0_1_0: 2587; GFX9-SDAG: ; %bb.0: 2588; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2589; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2590; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2591; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 2592; GFX9-SDAG-NEXT: s_brev_b32 s2, 35 2593; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2594; GFX9-SDAG-NEXT: v_pk_sub_i16 v1, v1, s2 2595; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 2596; GFX9-SDAG-NEXT: s_endpgm 2597; 2598; GFX9-GISEL-LABEL: v_test_v2i16_x_sub_0_1_0: 2599; GFX9-GISEL: ; %bb.0: 2600; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2601; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2602; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 35 2603; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2604; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 2605; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2606; GFX9-GISEL-NEXT: v_pk_sub_i16 v1, v1, v2 2607; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 2608; GFX9-GISEL-NEXT: s_endpgm 2609; 2610; GFX10-LABEL: v_test_v2i16_x_sub_0_1_0: 2611; GFX10: ; %bb.0: 2612; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2613; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2614; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2615; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 2616; GFX10-NEXT: s_waitcnt vmcnt(0) 2617; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0xc4000000 2618; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2619; GFX10-NEXT: s_endpgm 2620; 2621; GFX11-LABEL: v_test_v2i16_x_sub_0_1_0: 2622; GFX11: ; %bb.0: 2623; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2624; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2625; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2626; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2627; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2628; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 2629; GFX11-NEXT: s_waitcnt vmcnt(0) 2630; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0xc4000000 2631; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2632; GFX11-NEXT: s_endpgm 2633 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2634 %tid.ext = sext i32 %tid to i64 2635 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 2636 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 2637 %x = load <2 x i16>, ptr addrspace(1) %gep 2638 %result = sub <2 x i16> %x, <i16 0, i16 -15360> 2639 store <2 x i16> %result, ptr addrspace(1) %gep.out 2640 ret void 2641} 2642 2643define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2644; SI-SDAG-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2645; SI-SDAG: ; %bb.0: 2646; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2647; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 2648; SI-SDAG-NEXT: s_mov_b32 s6, 0 2649; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2650; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2651; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2652; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 2653; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2654; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 2655; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2656; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xbc000000, v2 2657; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2658; SI-SDAG-NEXT: s_endpgm 2659; 2660; SI-GISEL-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2661; SI-GISEL: ; %bb.0: 2662; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2663; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2664; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2665; SI-GISEL-NEXT: s_mov_b32 s6, 0 2666; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2667; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2668; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2669; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2670; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2671; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2672; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffbc00, v3 2673; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2674; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2675; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2676; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2677; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2678; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2679; SI-GISEL-NEXT: s_endpgm 2680; 2681; VI-SDAG-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2682; VI-SDAG: ; %bb.0: 2683; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2684; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2685; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2686; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2687; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2688; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2689; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2690; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2691; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0xffffbc00 2692; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2693; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2694; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2695; VI-SDAG-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2696; VI-SDAG-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2697; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2698; VI-SDAG-NEXT: s_endpgm 2699; 2700; VI-GISEL-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2701; VI-GISEL: ; %bb.0: 2702; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2703; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2704; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2705; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2706; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2707; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2708; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2709; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2710; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2711; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2712; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xffffbc00 2713; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2714; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2715; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2716; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2717; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2718; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2719; VI-GISEL-NEXT: s_endpgm 2720; 2721; GFX9-SDAG-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2722; GFX9-SDAG: ; %bb.0: 2723; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2724; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2725; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2726; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 2727; GFX9-SDAG-NEXT: s_brev_b32 s2, 34 2728; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2729; GFX9-SDAG-NEXT: v_pk_sub_i16 v1, v1, s2 2730; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 2731; GFX9-SDAG-NEXT: s_endpgm 2732; 2733; GFX9-GISEL-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2734; GFX9-GISEL: ; %bb.0: 2735; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2736; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2737; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 34 2738; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2739; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 2740; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2741; GFX9-GISEL-NEXT: v_pk_sub_i16 v1, v1, v2 2742; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 2743; GFX9-GISEL-NEXT: s_endpgm 2744; 2745; GFX10-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2746; GFX10: ; %bb.0: 2747; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2748; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2749; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2750; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 2751; GFX10-NEXT: s_waitcnt vmcnt(0) 2752; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x44000000 2753; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2754; GFX10-NEXT: s_endpgm 2755; 2756; GFX11-LABEL: v_test_v2i16_x_sub_0_neg1_0: 2757; GFX11: ; %bb.0: 2758; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2759; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2760; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2761; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2762; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2763; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 2764; GFX11-NEXT: s_waitcnt vmcnt(0) 2765; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x44000000 2766; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2767; GFX11-NEXT: s_endpgm 2768 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2769 %tid.ext = sext i32 %tid to i64 2770 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 2771 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 2772 %x = load <2 x i16>, ptr addrspace(1) %gep 2773 %result = sub <2 x i16> %x, <i16 0, i16 17408> 2774 store <2 x i16> %result, ptr addrspace(1) %gep.out 2775 ret void 2776} 2777 2778; -32 isn't an inline immediate, but 32 is 2779define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2780; SI-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32: 2781; SI-SDAG: ; %bb.0: 2782; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2783; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 2784; SI-SDAG-NEXT: s_mov_b32 s6, 0 2785; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2786; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2787; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2788; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 2789; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2790; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 2791; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2792; SI-SDAG-NEXT: v_subrev_i32_e32 v3, vcc, 32, v2 2793; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 2794; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 2795; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xffe00000, v2 2796; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2797; SI-SDAG-NEXT: s_endpgm 2798; 2799; SI-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32: 2800; SI-GISEL: ; %bb.0: 2801; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2802; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2803; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2804; SI-GISEL-NEXT: s_mov_b32 s6, 0 2805; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2806; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2807; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2808; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2809; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2810; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2811; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffe0, v2 2812; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe0, v3 2813; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2814; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2815; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2816; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2817; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2818; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2819; SI-GISEL-NEXT: s_endpgm 2820; 2821; VI-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32: 2822; VI-SDAG: ; %bb.0: 2823; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2824; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2825; VI-SDAG-NEXT: v_mov_b32_e32 v4, 32 2826; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2827; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2828; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2829; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2830; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2831; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2832; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2833; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2834; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2835; VI-SDAG-NEXT: v_sub_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2836; VI-SDAG-NEXT: v_subrev_u16_e32 v3, 32, v3 2837; VI-SDAG-NEXT: v_or_b32_e32 v2, v3, v2 2838; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2839; VI-SDAG-NEXT: s_endpgm 2840; 2841; VI-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32: 2842; VI-GISEL: ; %bb.0: 2843; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2844; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2845; VI-GISEL-NEXT: v_not_b32_e32 v4, 31 2846; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2847; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2848; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2849; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2850; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2851; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2852; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2853; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2854; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2855; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2856; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2857; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3 2858; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2859; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2860; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2861; VI-GISEL-NEXT: s_endpgm 2862; 2863; GFX9-LABEL: v_test_v2i16_x_add_neg32_neg32: 2864; GFX9: ; %bb.0: 2865; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2866; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2867; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2868; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 2869; GFX9-NEXT: s_waitcnt vmcnt(0) 2870; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0] 2871; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 2872; GFX9-NEXT: s_endpgm 2873; 2874; GFX10-LABEL: v_test_v2i16_x_add_neg32_neg32: 2875; GFX10: ; %bb.0: 2876; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2877; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2878; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2879; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 2880; GFX10-NEXT: s_waitcnt vmcnt(0) 2881; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0] 2882; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2883; GFX10-NEXT: s_endpgm 2884; 2885; GFX11-LABEL: v_test_v2i16_x_add_neg32_neg32: 2886; GFX11: ; %bb.0: 2887; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 2888; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2889; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2890; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2891; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2892; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 2893; GFX11-NEXT: s_waitcnt vmcnt(0) 2894; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0] 2895; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2896; GFX11-NEXT: s_endpgm 2897 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2898 %tid.ext = sext i32 %tid to i64 2899 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 2900 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 2901 %x = load <2 x i16>, ptr addrspace(1) %gep 2902 %result = add <2 x i16> %x, <i16 -32, i16 -32> 2903 store <2 x i16> %result, ptr addrspace(1) %gep.out 2904 ret void 2905} 2906 2907define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 2908; SI-SDAG-LABEL: v_test_v2i16_x_add_0_neg32: 2909; SI-SDAG: ; %bb.0: 2910; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2911; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 2912; SI-SDAG-NEXT: s_mov_b32 s6, 0 2913; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2914; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2915; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2916; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 2917; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2918; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 2919; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2920; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xffe00000, v2 2921; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2922; SI-SDAG-NEXT: s_endpgm 2923; 2924; SI-GISEL-LABEL: v_test_v2i16_x_add_0_neg32: 2925; SI-GISEL: ; %bb.0: 2926; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2927; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2928; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2929; SI-GISEL-NEXT: s_mov_b32 s6, 0 2930; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2931; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2932; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 2933; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 2934; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2935; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 2936; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe0, v3 2937; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 2938; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 2939; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2940; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 2941; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 2942; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2943; SI-GISEL-NEXT: s_endpgm 2944; 2945; VI-SDAG-LABEL: v_test_v2i16_x_add_0_neg32: 2946; VI-SDAG: ; %bb.0: 2947; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2948; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2949; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2950; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2951; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 2952; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2953; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 2954; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 2955; VI-SDAG-NEXT: v_mov_b32_e32 v2, 32 2956; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2957; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2958; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2959; VI-SDAG-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2960; VI-SDAG-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2961; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2962; VI-SDAG-NEXT: s_endpgm 2963; 2964; VI-GISEL-LABEL: v_test_v2i16_x_add_0_neg32: 2965; VI-GISEL: ; %bb.0: 2966; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2967; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 2968; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2969; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2970; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2971; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2972; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2973; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 2974; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2975; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2976; VI-GISEL-NEXT: v_not_b32_e32 v2, 31 2977; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2978; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2979; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2980; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 2981; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 2982; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2983; VI-GISEL-NEXT: s_endpgm 2984; 2985; GFX9-LABEL: v_test_v2i16_x_add_0_neg32: 2986; GFX9: ; %bb.0: 2987; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2988; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2989; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2990; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 2991; GFX9-NEXT: s_waitcnt vmcnt(0) 2992; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] 2993; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 2994; GFX9-NEXT: s_endpgm 2995; 2996; GFX10-LABEL: v_test_v2i16_x_add_0_neg32: 2997; GFX10: ; %bb.0: 2998; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2999; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3000; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3001; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 3002; GFX10-NEXT: s_waitcnt vmcnt(0) 3003; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] 3004; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 3005; GFX10-NEXT: s_endpgm 3006; 3007; GFX11-LABEL: v_test_v2i16_x_add_0_neg32: 3008; GFX11: ; %bb.0: 3009; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3010; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3011; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3012; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3013; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3014; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 3015; GFX11-NEXT: s_waitcnt vmcnt(0) 3016; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] 3017; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3018; GFX11-NEXT: s_endpgm 3019 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3020 %tid.ext = sext i32 %tid to i64 3021 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3022 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3023 %x = load <2 x i16>, ptr addrspace(1) %gep 3024 %result = add <2 x i16> %x, <i16 0, i16 -32> 3025 store <2 x i16> %result, ptr addrspace(1) %gep.out 3026 ret void 3027} 3028 3029define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3030; SI-SDAG-LABEL: v_test_v2i16_x_add_neg32_0: 3031; SI-SDAG: ; %bb.0: 3032; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3033; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3034; SI-SDAG-NEXT: s_mov_b32 s6, 0 3035; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3036; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3037; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3038; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3039; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3040; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3041; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3042; SI-SDAG-NEXT: v_subrev_i32_e32 v3, vcc, 32, v2 3043; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff 3044; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v3, v2 3045; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3046; SI-SDAG-NEXT: s_endpgm 3047; 3048; SI-GISEL-LABEL: v_test_v2i16_x_add_neg32_0: 3049; SI-GISEL: ; %bb.0: 3050; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3051; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3052; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3053; SI-GISEL-NEXT: s_mov_b32 s6, 0 3054; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 3055; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3056; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 3057; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3058; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3059; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 3060; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffe0, v2 3061; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 3062; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3063; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3064; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 3065; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3066; SI-GISEL-NEXT: s_endpgm 3067; 3068; VI-SDAG-LABEL: v_test_v2i16_x_add_neg32_0: 3069; VI-SDAG: ; %bb.0: 3070; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3071; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3072; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3073; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3074; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 3075; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3076; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 3077; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3078; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 3079; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3080; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3081; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xffff0000, v3 3082; VI-SDAG-NEXT: v_subrev_u16_e32 v3, 32, v3 3083; VI-SDAG-NEXT: v_or_b32_e32 v2, v3, v2 3084; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3085; VI-SDAG-NEXT: s_endpgm 3086; 3087; VI-GISEL-LABEL: v_test_v2i16_x_add_neg32_0: 3088; VI-GISEL: ; %bb.0: 3089; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3090; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3091; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3092; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3093; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3094; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3095; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3096; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 3097; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3098; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3099; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3100; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3101; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3102; VI-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 3103; VI-GISEL-NEXT: v_add_u16_e32 v3, 0xffe0, v3 3104; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3105; VI-GISEL-NEXT: v_or_b32_e32 v2, v3, v2 3106; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3107; VI-GISEL-NEXT: s_endpgm 3108; 3109; GFX9-LABEL: v_test_v2i16_x_add_neg32_0: 3110; GFX9: ; %bb.0: 3111; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3112; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3113; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3114; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 3115; GFX9-NEXT: s_waitcnt vmcnt(0) 3116; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 3117; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 3118; GFX9-NEXT: s_endpgm 3119; 3120; GFX10-LABEL: v_test_v2i16_x_add_neg32_0: 3121; GFX10: ; %bb.0: 3122; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3123; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3124; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3125; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 3126; GFX10-NEXT: s_waitcnt vmcnt(0) 3127; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 3128; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 3129; GFX10-NEXT: s_endpgm 3130; 3131; GFX11-LABEL: v_test_v2i16_x_add_neg32_0: 3132; GFX11: ; %bb.0: 3133; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3134; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3135; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3136; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3137; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3138; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 3139; GFX11-NEXT: s_waitcnt vmcnt(0) 3140; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 3141; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3142; GFX11-NEXT: s_endpgm 3143 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3144 %tid.ext = sext i32 %tid to i64 3145 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3146 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3147 %x = load <2 x i16>, ptr addrspace(1) %gep 3148 %result = add <2 x i16> %x, <i16 -32, i16 0> 3149 store <2 x i16> %result, ptr addrspace(1) %gep.out 3150 ret void 3151} 3152 3153; 16 and -16 are both inline immediates 3154define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3155; SI-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16: 3156; SI-SDAG: ; %bb.0: 3157; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3158; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3159; SI-SDAG-NEXT: s_mov_b32 s6, 0 3160; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3161; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3162; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3163; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3164; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3165; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3166; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3167; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, -16, v2 3168; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 3169; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 3170; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xfff00000, v2 3171; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3172; SI-SDAG-NEXT: s_endpgm 3173; 3174; SI-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16: 3175; SI-GISEL: ; %bb.0: 3176; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3177; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3178; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3179; SI-GISEL-NEXT: s_mov_b32 s6, 0 3180; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 3181; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3182; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 3183; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3184; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3185; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 3186; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -16, v2 3187; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, -16, v3 3188; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 3189; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 3190; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3191; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3192; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 3193; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3194; SI-GISEL-NEXT: s_endpgm 3195; 3196; VI-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16: 3197; VI-SDAG: ; %bb.0: 3198; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3199; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3200; VI-SDAG-NEXT: v_mov_b32_e32 v4, -16 3201; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3202; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3203; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 3204; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3205; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 3206; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3207; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 3208; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3209; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3210; VI-SDAG-NEXT: v_add_u16_e32 v2, -16, v3 3211; VI-SDAG-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3212; VI-SDAG-NEXT: v_or_b32_e32 v2, v2, v3 3213; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3214; VI-SDAG-NEXT: s_endpgm 3215; 3216; VI-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16: 3217; VI-GISEL: ; %bb.0: 3218; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3219; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3220; VI-GISEL-NEXT: v_mov_b32_e32 v4, -16 3221; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3222; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3223; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3224; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3225; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3226; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 3227; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3228; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3229; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3230; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3231; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3232; VI-GISEL-NEXT: v_add_u16_e32 v2, -16, v3 3233; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3234; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3235; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3236; VI-GISEL-NEXT: s_endpgm 3237; 3238; GFX9-LABEL: v_test_v2i16_x_add_neg16_neg16: 3239; GFX9: ; %bb.0: 3240; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3241; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3242; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3243; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 3244; GFX9-NEXT: s_waitcnt vmcnt(0) 3245; GFX9-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0] 3246; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 3247; GFX9-NEXT: s_endpgm 3248; 3249; GFX10-LABEL: v_test_v2i16_x_add_neg16_neg16: 3250; GFX10: ; %bb.0: 3251; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3252; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3253; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3254; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 3255; GFX10-NEXT: s_waitcnt vmcnt(0) 3256; GFX10-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0] 3257; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 3258; GFX10-NEXT: s_endpgm 3259; 3260; GFX11-LABEL: v_test_v2i16_x_add_neg16_neg16: 3261; GFX11: ; %bb.0: 3262; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3263; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3264; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3265; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3266; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3267; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 3268; GFX11-NEXT: s_waitcnt vmcnt(0) 3269; GFX11-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0] 3270; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3271; GFX11-NEXT: s_endpgm 3272 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3273 %tid.ext = sext i32 %tid to i64 3274 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3275 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3276 %x = load <2 x i16>, ptr addrspace(1) %gep 3277 %result = add <2 x i16> %x, <i16 -16, i16 -16> 3278 store <2 x i16> %result, ptr addrspace(1) %gep.out 3279 ret void 3280} 3281 3282define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3283; SI-SDAG-LABEL: v_test_v2i16_x_add_0_neg16: 3284; SI-SDAG: ; %bb.0: 3285; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3286; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3287; SI-SDAG-NEXT: s_mov_b32 s6, 0 3288; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3289; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3290; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3291; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3292; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3293; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3294; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3295; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xfff00000, v2 3296; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3297; SI-SDAG-NEXT: s_endpgm 3298; 3299; SI-GISEL-LABEL: v_test_v2i16_x_add_0_neg16: 3300; SI-GISEL: ; %bb.0: 3301; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3302; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3303; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3304; SI-GISEL-NEXT: s_mov_b32 s6, 0 3305; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 3306; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3307; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 3308; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3309; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3310; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 3311; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, -16, v3 3312; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 3313; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 3314; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3315; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3316; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 3317; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3318; SI-GISEL-NEXT: s_endpgm 3319; 3320; VI-SDAG-LABEL: v_test_v2i16_x_add_0_neg16: 3321; VI-SDAG: ; %bb.0: 3322; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3323; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3324; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3325; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3326; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 3327; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3328; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 3329; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 3330; VI-SDAG-NEXT: v_mov_b32_e32 v2, -16 3331; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3332; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3333; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3334; VI-SDAG-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3335; VI-SDAG-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3336; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3337; VI-SDAG-NEXT: s_endpgm 3338; 3339; VI-GISEL-LABEL: v_test_v2i16_x_add_0_neg16: 3340; VI-GISEL: ; %bb.0: 3341; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3342; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3343; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3344; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3345; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3346; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3347; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3348; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 3349; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3350; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3351; VI-GISEL-NEXT: v_mov_b32_e32 v2, -16 3352; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3353; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3354; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3355; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3356; VI-GISEL-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3357; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3358; VI-GISEL-NEXT: s_endpgm 3359; 3360; GFX9-LABEL: v_test_v2i16_x_add_0_neg16: 3361; GFX9: ; %bb.0: 3362; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3363; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3364; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3365; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 3366; GFX9-NEXT: s_waitcnt vmcnt(0) 3367; GFX9-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0] 3368; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 3369; GFX9-NEXT: s_endpgm 3370; 3371; GFX10-LABEL: v_test_v2i16_x_add_0_neg16: 3372; GFX10: ; %bb.0: 3373; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3374; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3375; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3376; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 3377; GFX10-NEXT: s_waitcnt vmcnt(0) 3378; GFX10-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0] 3379; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 3380; GFX10-NEXT: s_endpgm 3381; 3382; GFX11-LABEL: v_test_v2i16_x_add_0_neg16: 3383; GFX11: ; %bb.0: 3384; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3385; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3386; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3387; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3388; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3389; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 3390; GFX11-NEXT: s_waitcnt vmcnt(0) 3391; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0] 3392; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3393; GFX11-NEXT: s_endpgm 3394 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3395 %tid.ext = sext i32 %tid to i64 3396 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3397 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3398 %x = load <2 x i16>, ptr addrspace(1) %gep 3399 %result = add <2 x i16> %x, <i16 0, i16 -16> 3400 store <2 x i16> %result, ptr addrspace(1) %gep.out 3401 ret void 3402} 3403 3404define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3405; SI-SDAG-LABEL: v_test_v2i16_x_add_neg16_0: 3406; SI-SDAG: ; %bb.0: 3407; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3408; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3409; SI-SDAG-NEXT: s_mov_b32 s6, 0 3410; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3411; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3412; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3413; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3414; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3415; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3416; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3417; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, -16, v2 3418; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff 3419; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v3, v2 3420; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3421; SI-SDAG-NEXT: s_endpgm 3422; 3423; SI-GISEL-LABEL: v_test_v2i16_x_add_neg16_0: 3424; SI-GISEL: ; %bb.0: 3425; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3426; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3427; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3428; SI-GISEL-NEXT: s_mov_b32 s6, 0 3429; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 3430; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3431; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 3432; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3433; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3434; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 3435; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, -16, v2 3436; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 3437; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3438; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3439; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 3440; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3441; SI-GISEL-NEXT: s_endpgm 3442; 3443; VI-SDAG-LABEL: v_test_v2i16_x_add_neg16_0: 3444; VI-SDAG: ; %bb.0: 3445; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3446; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3447; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3448; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3449; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 3450; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3451; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 3452; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3453; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 3454; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3455; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3456; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xffff0000, v3 3457; VI-SDAG-NEXT: v_add_u16_e32 v3, -16, v3 3458; VI-SDAG-NEXT: v_or_b32_e32 v2, v3, v2 3459; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3460; VI-SDAG-NEXT: s_endpgm 3461; 3462; VI-GISEL-LABEL: v_test_v2i16_x_add_neg16_0: 3463; VI-GISEL: ; %bb.0: 3464; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3465; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3466; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3467; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3468; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3469; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3470; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3471; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 3472; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3473; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3474; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3475; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3476; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3477; VI-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3 3478; VI-GISEL-NEXT: v_add_u16_e32 v3, -16, v3 3479; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 3480; VI-GISEL-NEXT: v_or_b32_e32 v2, v3, v2 3481; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3482; VI-GISEL-NEXT: s_endpgm 3483; 3484; GFX9-LABEL: v_test_v2i16_x_add_neg16_0: 3485; GFX9: ; %bb.0: 3486; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3487; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3488; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3489; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 3490; GFX9-NEXT: s_waitcnt vmcnt(0) 3491; GFX9-NEXT: v_pk_sub_u16 v1, v1, 16 3492; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 3493; GFX9-NEXT: s_endpgm 3494; 3495; GFX10-LABEL: v_test_v2i16_x_add_neg16_0: 3496; GFX10: ; %bb.0: 3497; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3498; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3499; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3500; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 3501; GFX10-NEXT: s_waitcnt vmcnt(0) 3502; GFX10-NEXT: v_pk_sub_u16 v1, v1, 16 3503; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 3504; GFX10-NEXT: s_endpgm 3505; 3506; GFX11-LABEL: v_test_v2i16_x_add_neg16_0: 3507; GFX11: ; %bb.0: 3508; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3509; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3510; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3511; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3512; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3513; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 3514; GFX11-NEXT: s_waitcnt vmcnt(0) 3515; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16 3516; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3517; GFX11-NEXT: s_endpgm 3518 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3519 %tid.ext = sext i32 %tid to i64 3520 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3521 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3522 %x = load <2 x i16>, ptr addrspace(1) %gep 3523 %result = add <2 x i16> %x, <i16 -16, i16 0> 3524 store <2 x i16> %result, ptr addrspace(1) %gep.out 3525 ret void 3526} 3527 3528define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3529; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone: 3530; SI-SDAG: ; %bb.0: 3531; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3532; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3533; SI-SDAG-NEXT: s_mov_b32 s6, 0 3534; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3535; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3536; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3537; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3538; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3539; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3540; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3541; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, 0xffffc400, v2 3542; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 3543; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 3544; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xc4000000, v2 3545; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3546; SI-SDAG-NEXT: s_endpgm 3547; 3548; SI-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone: 3549; SI-GISEL: ; %bb.0: 3550; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3551; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3552; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3553; SI-GISEL-NEXT: s_mov_b32 s6, 0 3554; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 3555; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3556; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 3557; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3558; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3559; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 3560; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffc400, v2 3561; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffc400, v3 3562; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 3563; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 3564; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3565; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3566; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 3567; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3568; SI-GISEL-NEXT: s_endpgm 3569; 3570; VI-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone: 3571; VI-SDAG: ; %bb.0: 3572; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3573; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3574; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffc400 3575; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3576; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3577; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 3578; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3579; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 3580; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3581; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 3582; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3583; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3584; VI-SDAG-NEXT: v_add_u16_e32 v2, 0xc400, v3 3585; VI-SDAG-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3586; VI-SDAG-NEXT: v_or_b32_e32 v2, v2, v3 3587; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3588; VI-SDAG-NEXT: s_endpgm 3589; 3590; VI-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone: 3591; VI-GISEL: ; %bb.0: 3592; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3593; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3594; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffc400 3595; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3596; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3597; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3598; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3599; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3600; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 3601; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3602; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3603; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3604; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3605; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3606; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xc400, v3 3607; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3608; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3609; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3610; VI-GISEL-NEXT: s_endpgm 3611; 3612; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone: 3613; GFX9-SDAG: ; %bb.0: 3614; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3615; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3616; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3617; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 3618; GFX9-SDAG-NEXT: s_movk_i32 s2, 0xc400 3619; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 3620; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2 op_sel_hi:[1,0] 3621; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 3622; GFX9-SDAG-NEXT: s_endpgm 3623; 3624; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone: 3625; GFX9-GISEL: ; %bb.0: 3626; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3627; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3628; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc400c400 3629; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3630; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 3631; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 3632; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2 3633; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 3634; GFX9-GISEL-NEXT: s_endpgm 3635; 3636; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone: 3637; GFX10-SDAG: ; %bb.0: 3638; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3639; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3640; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3641; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 3642; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 3643; GFX10-SDAG-NEXT: v_pk_add_u16 v1, 0xffffc400, v1 op_sel_hi:[0,1] 3644; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 3645; GFX10-SDAG-NEXT: s_endpgm 3646; 3647; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone: 3648; GFX10-GISEL: ; %bb.0: 3649; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3650; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3651; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3652; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 3653; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 3654; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xc400c400, v1 3655; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 3656; GFX10-GISEL-NEXT: s_endpgm 3657; 3658; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone: 3659; GFX11-SDAG: ; %bb.0: 3660; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3661; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3662; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3663; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3664; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3665; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 3666; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 3667; GFX11-SDAG-NEXT: v_pk_add_u16 v1, 0xffffc400, v1 op_sel_hi:[0,1] 3668; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 3669; GFX11-SDAG-NEXT: s_endpgm 3670; 3671; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone: 3672; GFX11-GISEL: ; %bb.0: 3673; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3674; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3675; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 3676; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3677; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3678; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 3679; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 3680; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xc400c400, v1 3681; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 3682; GFX11-GISEL-NEXT: s_endpgm 3683 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3684 %tid.ext = sext i32 %tid to i64 3685 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3686 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3687 %x = load <2 x i16>, ptr addrspace(1) %gep 3688 %result = add <2 x i16> %x, <i16 -15360, i16 -15360> 3689 store <2 x i16> %result, ptr addrspace(1) %gep.out 3690 ret void 3691} 3692 3693define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3694; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone: 3695; SI-SDAG: ; %bb.0: 3696; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3697; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3698; SI-SDAG-NEXT: s_mov_b32 s6, 0 3699; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3700; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3701; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3702; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3703; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3704; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3705; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3706; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, 0x4400, v2 3707; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 3708; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 3709; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0x44000000, v2 3710; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3711; SI-SDAG-NEXT: s_endpgm 3712; 3713; SI-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone: 3714; SI-GISEL: ; %bb.0: 3715; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3716; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3717; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3718; SI-GISEL-NEXT: s_mov_b32 s6, 0 3719; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 3720; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3721; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 3722; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3723; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3724; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 3725; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0x4400, v2 3726; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0x4400, v3 3727; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 3728; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 3729; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3730; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3731; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 3732; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3733; SI-GISEL-NEXT: s_endpgm 3734; 3735; VI-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone: 3736; VI-SDAG: ; %bb.0: 3737; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3738; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3739; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x4400 3740; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3741; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3742; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 3743; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3744; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 3745; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3746; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 3747; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3748; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3749; VI-SDAG-NEXT: v_add_u16_e32 v2, 0x4400, v3 3750; VI-SDAG-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3751; VI-SDAG-NEXT: v_or_b32_e32 v2, v2, v3 3752; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3753; VI-SDAG-NEXT: s_endpgm 3754; 3755; VI-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone: 3756; VI-GISEL: ; %bb.0: 3757; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3758; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3759; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x4400 3760; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3761; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3762; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3763; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3764; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3765; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 3766; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3767; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3768; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3769; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3770; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3771; VI-GISEL-NEXT: v_add_u16_e32 v2, 0x4400, v3 3772; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3773; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3774; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3775; VI-GISEL-NEXT: s_endpgm 3776; 3777; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone: 3778; GFX9-SDAG: ; %bb.0: 3779; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3780; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3781; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3782; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 3783; GFX9-SDAG-NEXT: s_movk_i32 s2, 0x4400 3784; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 3785; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2 op_sel_hi:[1,0] 3786; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 3787; GFX9-SDAG-NEXT: s_endpgm 3788; 3789; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone: 3790; GFX9-GISEL: ; %bb.0: 3791; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3792; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3793; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x44004400 3794; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3795; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 3796; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 3797; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2 3798; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 3799; GFX9-GISEL-NEXT: s_endpgm 3800; 3801; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone: 3802; GFX10-SDAG: ; %bb.0: 3803; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3804; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3805; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3806; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 3807; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 3808; GFX10-SDAG-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1] 3809; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 3810; GFX10-SDAG-NEXT: s_endpgm 3811; 3812; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone: 3813; GFX10-GISEL: ; %bb.0: 3814; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3815; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3816; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3817; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 3818; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 3819; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0x44004400, v1 3820; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 3821; GFX10-GISEL-NEXT: s_endpgm 3822; 3823; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone: 3824; GFX11-SDAG: ; %bb.0: 3825; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3826; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3827; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3828; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3829; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3830; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 3831; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 3832; GFX11-SDAG-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1] 3833; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 3834; GFX11-SDAG-NEXT: s_endpgm 3835; 3836; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone: 3837; GFX11-GISEL: ; %bb.0: 3838; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3839; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3840; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 3841; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3842; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3843; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 3844; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 3845; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0x44004400, v1 3846; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 3847; GFX11-GISEL-NEXT: s_endpgm 3848 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3849 %tid.ext = sext i32 %tid to i64 3850 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3851 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3852 %x = load <2 x i16>, ptr addrspace(1) %gep 3853 %result = add <2 x i16> %x, <i16 17408, i16 17408> 3854 store <2 x i16> %result, ptr addrspace(1) %gep.out 3855 ret void 3856} 3857 3858define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3859; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo: 3860; SI-SDAG: ; %bb.0: 3861; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3862; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3863; SI-SDAG-NEXT: s_mov_b32 s6, 0 3864; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3865; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3866; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3867; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3868; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3869; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3870; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3871; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, 0x4000, v2 3872; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 3873; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 3874; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 2.0, v2 3875; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3876; SI-SDAG-NEXT: s_endpgm 3877; 3878; SI-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo: 3879; SI-GISEL: ; %bb.0: 3880; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3881; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3882; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3883; SI-GISEL-NEXT: s_mov_b32 s6, 0 3884; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 3885; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3886; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 3887; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3888; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3889; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 3890; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0x4000, v2 3891; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0x4000, v3 3892; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 3893; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 3894; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 3895; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3896; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 3897; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3898; SI-GISEL-NEXT: s_endpgm 3899; 3900; VI-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo: 3901; VI-SDAG: ; %bb.0: 3902; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3903; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3904; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x4000 3905; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3906; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3907; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 3908; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3909; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 3910; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3911; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 3912; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3913; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3914; VI-SDAG-NEXT: v_add_u16_e32 v2, 0x4000, v3 3915; VI-SDAG-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3916; VI-SDAG-NEXT: v_or_b32_e32 v2, v2, v3 3917; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3918; VI-SDAG-NEXT: s_endpgm 3919; 3920; VI-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo: 3921; VI-GISEL: ; %bb.0: 3922; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3923; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 3924; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x4000 3925; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3926; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3927; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3928; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3929; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3930; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 3931; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3932; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3933; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 3934; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3935; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3936; VI-GISEL-NEXT: v_add_u16_e32 v2, 0x4000, v3 3937; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3938; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 3939; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3940; VI-GISEL-NEXT: s_endpgm 3941; 3942; GFX9-LABEL: v_test_v2i16_x_add_neg_fptwo: 3943; GFX9: ; %bb.0: 3944; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3945; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3946; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3947; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 3948; GFX9-NEXT: s_waitcnt vmcnt(0) 3949; GFX9-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1] 3950; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 3951; GFX9-NEXT: s_endpgm 3952; 3953; GFX10-LABEL: v_test_v2i16_x_add_neg_fptwo: 3954; GFX10: ; %bb.0: 3955; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3956; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3957; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3958; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 3959; GFX10-NEXT: s_waitcnt vmcnt(0) 3960; GFX10-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1] 3961; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 3962; GFX10-NEXT: s_endpgm 3963; 3964; GFX11-LABEL: v_test_v2i16_x_add_neg_fptwo: 3965; GFX11: ; %bb.0: 3966; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 3967; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3968; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3969; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3970; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3971; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 3972; GFX11-NEXT: s_waitcnt vmcnt(0) 3973; GFX11-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1] 3974; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3975; GFX11-NEXT: s_endpgm 3976 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3977 %tid.ext = sext i32 %tid to i64 3978 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 3979 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 3980 %x = load <2 x i16>, ptr addrspace(1) %gep 3981 %result = add <2 x i16> %x, <i16 16384, i16 16384> 3982 store <2 x i16> %result, ptr addrspace(1) %gep.out 3983 ret void 3984} 3985 3986define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 3987; SI-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo: 3988; SI-SDAG: ; %bb.0: 3989; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3990; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 3991; SI-SDAG-NEXT: s_mov_b32 s6, 0 3992; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3993; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3994; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3995; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 3996; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 3997; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 3998; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3999; SI-SDAG-NEXT: v_add_i32_e32 v3, vcc, 0xffffc000, v2 4000; SI-SDAG-NEXT: s_mov_b32 s4, 0xffff0000 4001; SI-SDAG-NEXT: v_bfi_b32 v2, s4, v2, v3 4002; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, -2.0, v2 4003; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4004; SI-SDAG-NEXT: s_endpgm 4005; 4006; SI-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo: 4007; SI-GISEL: ; %bb.0: 4008; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4009; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4010; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4011; SI-GISEL-NEXT: s_mov_b32 s6, 0 4012; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 4013; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4014; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 4015; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 4016; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4017; SI-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 4018; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffc000, v2 4019; SI-GISEL-NEXT: v_add_i32_e32 v3, vcc, 0xffffc000, v3 4020; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 4021; SI-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 4022; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 4023; SI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 4024; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 4025; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4026; SI-GISEL-NEXT: s_endpgm 4027; 4028; VI-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo: 4029; VI-SDAG: ; %bb.0: 4030; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4031; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 4032; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0xffffc000 4033; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4034; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4035; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 4036; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4037; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 4038; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4039; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 4040; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4041; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4042; VI-SDAG-NEXT: v_add_u16_e32 v2, 0xc000, v3 4043; VI-SDAG-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4044; VI-SDAG-NEXT: v_or_b32_e32 v2, v2, v3 4045; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4046; VI-SDAG-NEXT: s_endpgm 4047; 4048; VI-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo: 4049; VI-GISEL: ; %bb.0: 4050; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4051; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 4052; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xffffc000 4053; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4054; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4055; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4056; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4057; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4058; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 4059; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4060; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4061; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4062; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4063; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4064; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xc000, v3 4065; VI-GISEL-NEXT: v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4066; VI-GISEL-NEXT: v_or_b32_e32 v2, v2, v3 4067; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4068; VI-GISEL-NEXT: s_endpgm 4069; 4070; GFX9-LABEL: v_test_v2i16_x_add_neg_negfptwo: 4071; GFX9: ; %bb.0: 4072; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4073; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4074; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4075; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 4076; GFX9-NEXT: s_waitcnt vmcnt(0) 4077; GFX9-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1] 4078; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 4079; GFX9-NEXT: s_endpgm 4080; 4081; GFX10-LABEL: v_test_v2i16_x_add_neg_negfptwo: 4082; GFX10: ; %bb.0: 4083; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4084; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4085; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4086; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 4087; GFX10-NEXT: s_waitcnt vmcnt(0) 4088; GFX10-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1] 4089; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 4090; GFX10-NEXT: s_endpgm 4091; 4092; GFX11-LABEL: v_test_v2i16_x_add_neg_negfptwo: 4093; GFX11: ; %bb.0: 4094; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4095; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4096; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4097; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4098; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4099; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 4100; GFX11-NEXT: s_waitcnt vmcnt(0) 4101; GFX11-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1] 4102; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4103; GFX11-NEXT: s_endpgm 4104 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4105 %tid.ext = sext i32 %tid to i64 4106 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 4107 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 4108 %x = load <2 x i16>, ptr addrspace(1) %gep 4109 %result = add <2 x i16> %x, <i16 -16384, i16 -16384> 4110 store <2 x i16> %result, ptr addrspace(1) %gep.out 4111 ret void 4112} 4113 4114define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 4115; SI-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32: 4116; SI-SDAG: ; %bb.0: 4117; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4118; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 4119; SI-SDAG-NEXT: s_mov_b32 s6, 0 4120; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4121; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4122; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4123; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 4124; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 4125; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 4126; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4127; SI-SDAG-NEXT: v_and_b32_e32 v2, 0xffff0000, v2 4128; SI-SDAG-NEXT: v_add_i32_e32 v2, vcc, 0xffe00000, v2 4129; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4130; SI-SDAG-NEXT: s_endpgm 4131; 4132; SI-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32: 4133; SI-GISEL: ; %bb.0: 4134; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4135; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4136; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4137; SI-GISEL-NEXT: s_mov_b32 s6, 0 4138; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 4139; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4140; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 4141; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 4142; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4143; SI-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v2 4144; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffe0, v2 4145; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 4146; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 4147; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 4148; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4149; SI-GISEL-NEXT: s_endpgm 4150; 4151; VI-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32: 4152; VI-SDAG: ; %bb.0: 4153; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4154; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 4155; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4156; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4157; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 4158; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4159; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 4160; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4161; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 4162; VI-SDAG-NEXT: v_mov_b32_e32 v2, 32 4163; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4164; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4165; VI-SDAG-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4166; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4167; VI-SDAG-NEXT: s_endpgm 4168; 4169; VI-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32: 4170; VI-GISEL: ; %bb.0: 4171; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4172; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 4173; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4174; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4175; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4176; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4177; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4178; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 4179; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4180; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4181; VI-GISEL-NEXT: v_not_b32_e32 v2, 31 4182; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4183; VI-GISEL-NEXT: s_and_b32 s0, 0xffff, s0 4184; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4185; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4186; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4187; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2 4188; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4189; VI-GISEL-NEXT: s_endpgm 4190; 4191; GFX9-LABEL: v_test_v2i16_x_add_undef_neg32: 4192; GFX9: ; %bb.0: 4193; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4194; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4195; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4196; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 4197; GFX9-NEXT: s_waitcnt vmcnt(0) 4198; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] 4199; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 4200; GFX9-NEXT: s_endpgm 4201; 4202; GFX10-LABEL: v_test_v2i16_x_add_undef_neg32: 4203; GFX10: ; %bb.0: 4204; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4205; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4206; GFX10-NEXT: s_waitcnt lgkmcnt(0) 4207; GFX10-NEXT: global_load_dword v1, v0, s[2:3] 4208; GFX10-NEXT: s_waitcnt vmcnt(0) 4209; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] 4210; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 4211; GFX10-NEXT: s_endpgm 4212; 4213; GFX11-LABEL: v_test_v2i16_x_add_undef_neg32: 4214; GFX11: ; %bb.0: 4215; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4216; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4217; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4218; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4219; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4220; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 4221; GFX11-NEXT: s_waitcnt vmcnt(0) 4222; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0] 4223; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4224; GFX11-NEXT: s_endpgm 4225 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4226 %tid.ext = sext i32 %tid to i64 4227 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 4228 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 4229 %x = load <2 x i16>, ptr addrspace(1) %gep 4230 %result = add <2 x i16> %x, <i16 undef, i16 -32> 4231 store <2 x i16> %result, ptr addrspace(1) %gep.out 4232 ret void 4233} 4234 4235define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 4236; SI-SDAG-LABEL: v_test_v2i16_x_add_neg32_undef: 4237; SI-SDAG: ; %bb.0: 4238; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4239; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 4240; SI-SDAG-NEXT: s_mov_b32 s6, 0 4241; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4242; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4243; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4244; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 4245; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 4246; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 4247; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4248; SI-SDAG-NEXT: v_subrev_i32_e32 v2, vcc, 32, v2 4249; SI-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2 4250; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4251; SI-SDAG-NEXT: s_endpgm 4252; 4253; SI-GISEL-LABEL: v_test_v2i16_x_add_neg32_undef: 4254; SI-GISEL: ; %bb.0: 4255; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4256; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4257; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4258; SI-GISEL-NEXT: s_mov_b32 s6, 0 4259; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 4260; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4261; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 4262; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 4263; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4264; SI-GISEL-NEXT: v_add_i32_e32 v2, vcc, 0xffffffe0, v2 4265; SI-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 4266; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 4267; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4268; SI-GISEL-NEXT: s_endpgm 4269; 4270; VI-SDAG-LABEL: v_test_v2i16_x_add_neg32_undef: 4271; VI-SDAG: ; %bb.0: 4272; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4273; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 4274; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4275; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4276; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 4277; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4278; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 4279; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4280; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 4281; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4282; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4283; VI-SDAG-NEXT: v_subrev_u16_e32 v2, 32, v3 4284; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4285; VI-SDAG-NEXT: s_endpgm 4286; 4287; VI-GISEL-LABEL: v_test_v2i16_x_add_neg32_undef: 4288; VI-GISEL: ; %bb.0: 4289; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4290; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 4291; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4292; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4293; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4294; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4295; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4296; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 4297; VI-GISEL-NEXT: s_and_b32 s2, 0xffff, s0 4298; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4299; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4300; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 4301; VI-GISEL-NEXT: s_lshl_b32 s0, s2, 16 4302; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4303; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4304; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3 4305; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2 4306; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4307; VI-GISEL-NEXT: s_endpgm 4308; 4309; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg32_undef: 4310; GFX9-SDAG: ; %bb.0: 4311; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4312; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4313; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4314; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 4315; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 4316; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 4317; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 4318; GFX9-SDAG-NEXT: s_endpgm 4319; 4320; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg32_undef: 4321; GFX9-GISEL: ; %bb.0: 4322; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4323; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4324; GFX9-GISEL-NEXT: v_not_b32_e32 v2, 31 4325; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4326; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 4327; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 4328; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2 4329; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 4330; GFX9-GISEL-NEXT: s_endpgm 4331; 4332; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg32_undef: 4333; GFX10-SDAG: ; %bb.0: 4334; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4335; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4336; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4337; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 4338; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 4339; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 4340; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 4341; GFX10-SDAG-NEXT: s_endpgm 4342; 4343; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg32_undef: 4344; GFX10-GISEL: ; %bb.0: 4345; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 4346; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4347; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4348; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 4349; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 4350; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffffffe0, v1 4351; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 4352; GFX10-GISEL-NEXT: s_endpgm 4353; 4354; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg32_undef: 4355; GFX11-SDAG: ; %bb.0: 4356; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4357; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4358; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 4359; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4360; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4361; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 4362; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 4363; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 4364; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 4365; GFX11-SDAG-NEXT: s_endpgm 4366; 4367; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg32_undef: 4368; GFX11-GISEL: ; %bb.0: 4369; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 4370; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4371; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 4372; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4373; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4374; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 4375; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 4376; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffffffe0, v1 4377; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 4378; GFX11-GISEL-NEXT: s_endpgm 4379 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4380 %tid.ext = sext i32 %tid to i64 4381 %gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext 4382 %gep.out = getelementptr inbounds <2 x i16>, ptr addrspace(1) %out, i64 %tid.ext 4383 %x = load <2 x i16>, ptr addrspace(1) %gep 4384 %result = add <2 x i16> %x, <i16 -32, i16 undef> 4385 store <2 x i16> %result, ptr addrspace(1) %gep.out 4386 ret void 4387} 4388 4389declare i32 @llvm.amdgcn.workitem.id.x() #1 4390 4391attributes #0 = { nounwind } 4392attributes #1 = { nounwind readnone } 4393