1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GISEL %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GCN-FAKE16 %s 5; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s 6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GCN-REAL16 %s 7; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-REAL16 %s 8 9declare i32 @llvm.amdgcn.workitem.id.x() #1 10 11define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 { 12; GCN-LABEL: v_pack_b32_v2f16: 13; GCN: ; %bb.0: 14; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 15; GCN-NEXT: v_lshlrev_b32_e32 v0, 1, v0 16; GCN-NEXT: s_waitcnt lgkmcnt(0) 17; GCN-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 18; GCN-NEXT: s_waitcnt vmcnt(0) 19; GCN-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 20; GCN-NEXT: s_waitcnt vmcnt(0) 21; GCN-NEXT: v_add_f16_e32 v0, 2.0, v1 22; GCN-NEXT: v_add_f16_e32 v1, 2.0, v2 23; GCN-NEXT: v_pack_b32_f16 v0, v0, v1 24; GCN-NEXT: ;;#ASMSTART 25; GCN-NEXT: ; use v0 26; GCN-NEXT: ;;#ASMEND 27; GCN-NEXT: s_endpgm 28; 29; GISEL-LABEL: v_pack_b32_v2f16: 30; GISEL: ; %bb.0: 31; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 32; GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 33; GISEL-NEXT: s_waitcnt lgkmcnt(0) 34; GISEL-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 35; GISEL-NEXT: s_waitcnt vmcnt(0) 36; GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 37; GISEL-NEXT: s_waitcnt vmcnt(0) 38; GISEL-NEXT: v_add_f16_e32 v0, 2.0, v1 39; GISEL-NEXT: v_add_f16_e32 v1, 2.0, v2 40; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 41; GISEL-NEXT: ;;#ASMSTART 42; GISEL-NEXT: ; use v0 43; GISEL-NEXT: ;;#ASMEND 44; GISEL-NEXT: s_endpgm 45; 46; GFX11-GCN-FAKE16-LABEL: v_pack_b32_v2f16: 47; GFX11-GCN-FAKE16: ; %bb.0: 48; GFX11-GCN-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 49; GFX11-GCN-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 50; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 51; GFX11-GCN-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 52; GFX11-GCN-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 53; GFX11-GCN-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 54; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 55; GFX11-GCN-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 56; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 57; GFX11-GCN-FAKE16-NEXT: v_add_f16_e32 v1, 2.0, v1 58; GFX11-GCN-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 59; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 60; GFX11-GCN-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0 61; GFX11-GCN-FAKE16-NEXT: ;;#ASMSTART 62; GFX11-GCN-FAKE16-NEXT: ; use v0 63; GFX11-GCN-FAKE16-NEXT: ;;#ASMEND 64; GFX11-GCN-FAKE16-NEXT: s_endpgm 65; 66; GFX11-GISEL-FAKE16-LABEL: v_pack_b32_v2f16: 67; GFX11-GISEL-FAKE16: ; %bb.0: 68; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 69; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 70; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 71; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 72; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 73; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 74; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 75; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 76; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 77; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 2.0, v1 78; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 79; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 80; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0 81; GFX11-GISEL-FAKE16-NEXT: ;;#ASMSTART 82; GFX11-GISEL-FAKE16-NEXT: ; use v0 83; GFX11-GISEL-FAKE16-NEXT: ;;#ASMEND 84; GFX11-GISEL-FAKE16-NEXT: s_endpgm 85; 86; GFX11-GCN-REAL16-LABEL: v_pack_b32_v2f16: 87; GFX11-GCN-REAL16: ; %bb.0: 88; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 89; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 90; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 91; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 92; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) 93; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 94; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 95; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 96; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 97; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l 98; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l 99; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 100; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l 101; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h 102; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 103; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h 104; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART 105; GFX11-GCN-REAL16-NEXT: ; use v0 106; GFX11-GCN-REAL16-NEXT: ;;#ASMEND 107; GFX11-GCN-REAL16-NEXT: s_endpgm 108; 109; GFX11-GISEL-REAL16-LABEL: v_pack_b32_v2f16: 110; GFX11-GISEL-REAL16: ; %bb.0: 111; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 112; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 113; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 114; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 115; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0) 116; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 117; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 118; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 119; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 120; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v1.l 121; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l 122; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 123; GFX11-GISEL-REAL16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h 124; GFX11-GISEL-REAL16-NEXT: ;;#ASMSTART 125; GFX11-GISEL-REAL16-NEXT: ; use v0 126; GFX11-GISEL-REAL16-NEXT: ;;#ASMEND 127; GFX11-GISEL-REAL16-NEXT: s_endpgm 128 %tid = call i32 @llvm.amdgcn.workitem.id.x() 129 %tid.ext = sext i32 %tid to i64 130 %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext 131 %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext 132 %v0 = load volatile half, ptr addrspace(1) %in0.gep 133 %v1 = load volatile half, ptr addrspace(1) %in1.gep 134 %v0.add = fadd half %v0, 2.0 135 %v1.add = fadd half %v1, 2.0 136 %vec.0 = insertelement <2 x half> undef, half %v0.add, i32 0 137 %vec.1 = insertelement <2 x half> %vec.0, half %v1.add, i32 1 138 %vec.i32 = bitcast <2 x half> %vec.1 to i32 139 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0 140 ret void 141} 142 143define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 { 144; GCN-LABEL: v_pack_b32_v2f16_sub: 145; GCN: ; %bb.0: 146; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 147; GCN-NEXT: v_lshlrev_b32_e32 v0, 1, v0 148; GCN-NEXT: s_waitcnt lgkmcnt(0) 149; GCN-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 150; GCN-NEXT: s_waitcnt vmcnt(0) 151; GCN-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 152; GCN-NEXT: s_waitcnt vmcnt(0) 153; GCN-NEXT: v_subrev_f16_e32 v0, 2.0, v1 154; GCN-NEXT: v_add_f16_e32 v1, 2.0, v2 155; GCN-NEXT: v_pack_b32_f16 v0, v0, v1 156; GCN-NEXT: ;;#ASMSTART 157; GCN-NEXT: ; use v0 158; GCN-NEXT: ;;#ASMEND 159; GCN-NEXT: s_endpgm 160; 161; GISEL-LABEL: v_pack_b32_v2f16_sub: 162; GISEL: ; %bb.0: 163; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 164; GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 165; GISEL-NEXT: s_waitcnt lgkmcnt(0) 166; GISEL-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 167; GISEL-NEXT: s_waitcnt vmcnt(0) 168; GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 169; GISEL-NEXT: s_waitcnt vmcnt(0) 170; GISEL-NEXT: v_subrev_f16_e32 v0, 2.0, v1 171; GISEL-NEXT: v_add_f16_e32 v1, 2.0, v2 172; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 173; GISEL-NEXT: ;;#ASMSTART 174; GISEL-NEXT: ; use v0 175; GISEL-NEXT: ;;#ASMEND 176; GISEL-NEXT: s_endpgm 177; 178; GFX11-GCN-FAKE16-LABEL: v_pack_b32_v2f16_sub: 179; GFX11-GCN-FAKE16: ; %bb.0: 180; GFX11-GCN-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 181; GFX11-GCN-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 182; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 183; GFX11-GCN-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 184; GFX11-GCN-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 185; GFX11-GCN-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 186; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 187; GFX11-GCN-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 188; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 189; GFX11-GCN-FAKE16-NEXT: v_subrev_f16_e32 v1, 2.0, v1 190; GFX11-GCN-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 191; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 192; GFX11-GCN-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0 193; GFX11-GCN-FAKE16-NEXT: ;;#ASMSTART 194; GFX11-GCN-FAKE16-NEXT: ; use v0 195; GFX11-GCN-FAKE16-NEXT: ;;#ASMEND 196; GFX11-GCN-FAKE16-NEXT: s_endpgm 197; 198; GFX11-GISEL-FAKE16-LABEL: v_pack_b32_v2f16_sub: 199; GFX11-GISEL-FAKE16: ; %bb.0: 200; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 201; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 202; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 203; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 204; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 205; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 206; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 207; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 208; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 209; GFX11-GISEL-FAKE16-NEXT: v_subrev_f16_e32 v1, 2.0, v1 210; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 211; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 212; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v1, v0 213; GFX11-GISEL-FAKE16-NEXT: ;;#ASMSTART 214; GFX11-GISEL-FAKE16-NEXT: ; use v0 215; GFX11-GISEL-FAKE16-NEXT: ;;#ASMEND 216; GFX11-GISEL-FAKE16-NEXT: s_endpgm 217; 218; GFX11-GCN-REAL16-LABEL: v_pack_b32_v2f16_sub: 219; GFX11-GCN-REAL16: ; %bb.0: 220; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 221; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 222; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 223; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 224; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) 225; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 226; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 227; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 228; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 229; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l 230; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l 231; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 232; GFX11-GCN-REAL16-NEXT: v_subrev_f16_e32 v0.l, 2.0, v0.l 233; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h 234; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 235; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h 236; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART 237; GFX11-GCN-REAL16-NEXT: ; use v0 238; GFX11-GCN-REAL16-NEXT: ;;#ASMEND 239; GFX11-GCN-REAL16-NEXT: s_endpgm 240; 241; GFX11-GISEL-REAL16-LABEL: v_pack_b32_v2f16_sub: 242; GFX11-GISEL-REAL16: ; %bb.0: 243; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 244; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 245; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 246; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 247; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0) 248; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 249; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 250; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 251; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 252; GFX11-GISEL-REAL16-NEXT: v_subrev_f16_e32 v0.l, 2.0, v1.l 253; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l 254; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 255; GFX11-GISEL-REAL16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h 256; GFX11-GISEL-REAL16-NEXT: ;;#ASMSTART 257; GFX11-GISEL-REAL16-NEXT: ; use v0 258; GFX11-GISEL-REAL16-NEXT: ;;#ASMEND 259; GFX11-GISEL-REAL16-NEXT: s_endpgm 260 %tid = call i32 @llvm.amdgcn.workitem.id.x() 261 %tid.ext = sext i32 %tid to i64 262 %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext 263 %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext 264 %v0 = load volatile half, ptr addrspace(1) %in0.gep 265 %v1 = load volatile half, ptr addrspace(1) %in1.gep 266 %v0.add = fsub half %v0, 2.0 267 %v1.add = fadd half %v1, 2.0 268 %vec.0 = insertelement <2 x half> undef, half %v0.add, i32 0 269 %vec.1 = insertelement <2 x half> %vec.0, half %v1.add, i32 1 270 %vec.i32 = bitcast <2 x half> %vec.1 to i32 271 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0 272 ret void 273} 274 275define amdgpu_kernel void @fptrunc( 276; GCN-LABEL: fptrunc: 277; GCN: ; %bb.0: 278; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 279; GCN-NEXT: s_mov_b32 s6, -1 280; GCN-NEXT: s_mov_b32 s7, 0x31016000 281; GCN-NEXT: s_mov_b32 s10, s6 282; GCN-NEXT: s_mov_b32 s11, s7 283; GCN-NEXT: s_waitcnt lgkmcnt(0) 284; GCN-NEXT: s_mov_b32 s8, s2 285; GCN-NEXT: s_mov_b32 s9, s3 286; GCN-NEXT: s_mov_b32 s4, s0 287; GCN-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 288; GCN-NEXT: s_mov_b32 s5, s1 289; GCN-NEXT: s_waitcnt vmcnt(0) 290; GCN-NEXT: v_cvt_f16_f32_e32 v1, v1 291; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 292; GCN-NEXT: v_pack_b32_f16 v0, v0, v1 293; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 294; GCN-NEXT: s_endpgm 295; 296; GISEL-LABEL: fptrunc: 297; GISEL: ; %bb.0: 298; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 299; GISEL-NEXT: s_waitcnt lgkmcnt(0) 300; GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 301; GISEL-NEXT: s_waitcnt lgkmcnt(0) 302; GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 303; GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3 304; GISEL-NEXT: s_mov_b32 s2, -1 305; GISEL-NEXT: s_mov_b32 s3, 0x31016000 306; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 307; GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 308; GISEL-NEXT: s_endpgm 309; 310; GFX11-GCN-FAKE16-LABEL: fptrunc: 311; GFX11-GCN-FAKE16: ; %bb.0: 312; GFX11-GCN-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 313; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s6, -1 314; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 315; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s10, s6 316; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s11, s7 317; GFX11-GCN-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 318; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s8, s2 319; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s9, s3 320; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s4, s0 321; GFX11-GCN-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 322; GFX11-GCN-FAKE16-NEXT: s_mov_b32 s5, s1 323; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 324; GFX11-GCN-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 325; GFX11-GCN-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 326; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 327; GFX11-GCN-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 328; GFX11-GCN-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 329; GFX11-GCN-FAKE16-NEXT: s_endpgm 330; 331; GFX11-GISEL-FAKE16-LABEL: fptrunc: 332; GFX11-GISEL-FAKE16: ; %bb.0: 333; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 334; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 335; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 336; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 337; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, s2 338; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, s3 339; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 340; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 341; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 342; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 343; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 344; GFX11-GISEL-FAKE16-NEXT: s_endpgm 345; 346; GFX11-GCN-REAL16-LABEL: fptrunc: 347; GFX11-GCN-REAL16: ; %bb.0: 348; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 349; GFX11-GCN-REAL16-NEXT: s_mov_b32 s6, -1 350; GFX11-GCN-REAL16-NEXT: s_mov_b32 s7, 0x31016000 351; GFX11-GCN-REAL16-NEXT: s_mov_b32 s10, s6 352; GFX11-GCN-REAL16-NEXT: s_mov_b32 s11, s7 353; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) 354; GFX11-GCN-REAL16-NEXT: s_mov_b32 s8, s2 355; GFX11-GCN-REAL16-NEXT: s_mov_b32 s9, s3 356; GFX11-GCN-REAL16-NEXT: s_mov_b32 s4, s0 357; GFX11-GCN-REAL16-NEXT: buffer_load_b64 v[1:2], off, s[8:11], 0 358; GFX11-GCN-REAL16-NEXT: s_mov_b32 s5, s1 359; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 360; GFX11-GCN-REAL16-NEXT: v_cvt_f16_f32_e32 v0.l, v2 361; GFX11-GCN-REAL16-NEXT: v_cvt_f16_f32_e32 v0.h, v1 362; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 363; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l 364; GFX11-GCN-REAL16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 365; GFX11-GCN-REAL16-NEXT: s_endpgm 366; 367; GFX11-GISEL-REAL16-LABEL: fptrunc: 368; GFX11-GISEL-REAL16: ; %bb.0: 369; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 370; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0) 371; GFX11-GISEL-REAL16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 372; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0) 373; GFX11-GISEL-REAL16-NEXT: v_cvt_f16_f32_e32 v0.l, s2 374; GFX11-GISEL-REAL16-NEXT: v_cvt_f16_f32_e32 v0.h, s3 375; GFX11-GISEL-REAL16-NEXT: s_mov_b32 s2, -1 376; GFX11-GISEL-REAL16-NEXT: s_mov_b32 s3, 0x31016000 377; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 378; GFX11-GISEL-REAL16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h 379; GFX11-GISEL-REAL16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 380; GFX11-GISEL-REAL16-NEXT: s_endpgm 381 ptr addrspace(1) %r, 382 ptr addrspace(1) %a) { 383 %a.val = load <2 x float>, ptr addrspace(1) %a 384 %r.val = fptrunc <2 x float> %a.val to <2 x half> 385 store <2 x half> %r.val, ptr addrspace(1) %r 386 ret void 387} 388 389define amdgpu_kernel void @v_pack_b32.fabs(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 { 390; GCN-LABEL: v_pack_b32.fabs: 391; GCN: ; %bb.0: 392; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 393; GCN-NEXT: v_lshlrev_b32_e32 v0, 1, v0 394; GCN-NEXT: s_waitcnt lgkmcnt(0) 395; GCN-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 396; GCN-NEXT: s_waitcnt vmcnt(0) 397; GCN-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 398; GCN-NEXT: s_waitcnt vmcnt(0) 399; GCN-NEXT: v_add_f16_e32 v0, 2.0, v1 400; GCN-NEXT: v_add_f16_e32 v1, 2.0, v2 401; GCN-NEXT: v_pack_b32_f16 v0, |v0|, |v1| 402; GCN-NEXT: ;;#ASMSTART 403; GCN-NEXT: ; use v0 404; GCN-NEXT: ;;#ASMEND 405; GCN-NEXT: s_endpgm 406; 407; GISEL-LABEL: v_pack_b32.fabs: 408; GISEL: ; %bb.0: 409; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 410; GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 411; GISEL-NEXT: s_waitcnt lgkmcnt(0) 412; GISEL-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 413; GISEL-NEXT: s_waitcnt vmcnt(0) 414; GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 415; GISEL-NEXT: s_waitcnt vmcnt(0) 416; GISEL-NEXT: v_add_f16_e32 v0, 2.0, v1 417; GISEL-NEXT: v_add_f16_e32 v1, 2.0, v2 418; GISEL-NEXT: v_pack_b32_f16 v0, |v0|, |v1| 419; GISEL-NEXT: ;;#ASMSTART 420; GISEL-NEXT: ; use v0 421; GISEL-NEXT: ;;#ASMEND 422; GISEL-NEXT: s_endpgm 423; 424; GFX11-GCN-FAKE16-LABEL: v_pack_b32.fabs: 425; GFX11-GCN-FAKE16: ; %bb.0: 426; GFX11-GCN-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 427; GFX11-GCN-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 428; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 429; GFX11-GCN-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 430; GFX11-GCN-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 431; GFX11-GCN-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 432; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 433; GFX11-GCN-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 434; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 435; GFX11-GCN-FAKE16-NEXT: v_add_f16_e32 v1, 2.0, v1 436; GFX11-GCN-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 437; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 438; GFX11-GCN-FAKE16-NEXT: v_pack_b32_f16 v0, |v1|, |v0| 439; GFX11-GCN-FAKE16-NEXT: ;;#ASMSTART 440; GFX11-GCN-FAKE16-NEXT: ; use v0 441; GFX11-GCN-FAKE16-NEXT: ;;#ASMEND 442; GFX11-GCN-FAKE16-NEXT: s_endpgm 443; 444; GFX11-GISEL-FAKE16-LABEL: v_pack_b32.fabs: 445; GFX11-GISEL-FAKE16: ; %bb.0: 446; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 447; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 448; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 449; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 450; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 451; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 452; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 453; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 454; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 455; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 2.0, v1 456; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 457; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 458; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, |v1|, |v0| 459; GFX11-GISEL-FAKE16-NEXT: ;;#ASMSTART 460; GFX11-GISEL-FAKE16-NEXT: ; use v0 461; GFX11-GISEL-FAKE16-NEXT: ;;#ASMEND 462; GFX11-GISEL-FAKE16-NEXT: s_endpgm 463; 464; GFX11-GCN-REAL16-LABEL: v_pack_b32.fabs: 465; GFX11-GCN-REAL16: ; %bb.0: 466; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 467; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 468; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 469; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 470; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) 471; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 472; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 473; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 474; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 475; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l 476; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l 477; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 478; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l 479; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h 480; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 481; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, |v0.l|, |v0.h| 482; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART 483; GFX11-GCN-REAL16-NEXT: ; use v0 484; GFX11-GCN-REAL16-NEXT: ;;#ASMEND 485; GFX11-GCN-REAL16-NEXT: s_endpgm 486; 487; GFX11-GISEL-REAL16-LABEL: v_pack_b32.fabs: 488; GFX11-GISEL-REAL16: ; %bb.0: 489; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 490; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 491; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 492; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 493; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0) 494; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 495; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 496; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 497; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 498; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v1.l 499; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l 500; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 501; GFX11-GISEL-REAL16-NEXT: v_pack_b32_f16 v0, |v0.l|, |v0.h| 502; GFX11-GISEL-REAL16-NEXT: ;;#ASMSTART 503; GFX11-GISEL-REAL16-NEXT: ; use v0 504; GFX11-GISEL-REAL16-NEXT: ;;#ASMEND 505; GFX11-GISEL-REAL16-NEXT: s_endpgm 506 %tid = call i32 @llvm.amdgcn.workitem.id.x() 507 %tid.ext = sext i32 %tid to i64 508 %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext 509 %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext 510 %v0 = load volatile half, ptr addrspace(1) %in0.gep 511 %v1 = load volatile half, ptr addrspace(1) %in1.gep 512 %v0.add = fadd half %v0, 2.0 513 %v1.add = fadd half %v1, 2.0 514 %v0.fabs = call half @llvm.fabs.f16(half %v0.add) 515 %v1.fabs = call half @llvm.fabs.f16(half %v1.add) 516 %vec.0 = insertelement <2 x half> undef, half %v0.fabs, i32 0 517 %vec.1 = insertelement <2 x half> %vec.0, half %v1.fabs, i32 1 518 %vec.i32 = bitcast <2 x half> %vec.1 to i32 519 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0 520 ret void 521} 522 523define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 { 524; GCN-LABEL: v_pack_b32.fneg: 525; GCN: ; %bb.0: 526; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 527; GCN-NEXT: v_lshlrev_b32_e32 v0, 1, v0 528; GCN-NEXT: s_waitcnt lgkmcnt(0) 529; GCN-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 530; GCN-NEXT: s_waitcnt vmcnt(0) 531; GCN-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 532; GCN-NEXT: s_waitcnt vmcnt(0) 533; GCN-NEXT: v_add_f16_e32 v0, 2.0, v1 534; GCN-NEXT: v_add_f16_e32 v1, 2.0, v2 535; GCN-NEXT: v_pack_b32_f16 v0, -v0, -v1 536; GCN-NEXT: ;;#ASMSTART 537; GCN-NEXT: ; use v0 538; GCN-NEXT: ;;#ASMEND 539; GCN-NEXT: s_endpgm 540; 541; GISEL-LABEL: v_pack_b32.fneg: 542; GISEL: ; %bb.0: 543; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 544; GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 545; GISEL-NEXT: s_waitcnt lgkmcnt(0) 546; GISEL-NEXT: global_load_ushort v1, v0, s[0:1] glc dlc 547; GISEL-NEXT: s_waitcnt vmcnt(0) 548; GISEL-NEXT: global_load_ushort v2, v0, s[2:3] glc dlc 549; GISEL-NEXT: s_waitcnt vmcnt(0) 550; GISEL-NEXT: v_add_f16_e32 v0, 2.0, v1 551; GISEL-NEXT: v_add_f16_e32 v1, 2.0, v2 552; GISEL-NEXT: v_pack_b32_f16 v0, -v0, -v1 553; GISEL-NEXT: ;;#ASMSTART 554; GISEL-NEXT: ; use v0 555; GISEL-NEXT: ;;#ASMEND 556; GISEL-NEXT: s_endpgm 557; 558; GFX11-GCN-FAKE16-LABEL: v_pack_b32.fneg: 559; GFX11-GCN-FAKE16: ; %bb.0: 560; GFX11-GCN-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 561; GFX11-GCN-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 562; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 563; GFX11-GCN-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 564; GFX11-GCN-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 565; GFX11-GCN-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 566; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 567; GFX11-GCN-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 568; GFX11-GCN-FAKE16-NEXT: s_waitcnt vmcnt(0) 569; GFX11-GCN-FAKE16-NEXT: v_add_f16_e32 v1, 2.0, v1 570; GFX11-GCN-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 571; GFX11-GCN-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 572; GFX11-GCN-FAKE16-NEXT: v_pack_b32_f16 v0, -v1, -v0 573; GFX11-GCN-FAKE16-NEXT: ;;#ASMSTART 574; GFX11-GCN-FAKE16-NEXT: ; use v0 575; GFX11-GCN-FAKE16-NEXT: ;;#ASMEND 576; GFX11-GCN-FAKE16-NEXT: s_endpgm 577; 578; GFX11-GISEL-FAKE16-LABEL: v_pack_b32.fneg: 579; GFX11-GISEL-FAKE16: ; %bb.0: 580; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 581; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 582; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 583; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 584; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 585; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 586; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 587; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v0, v0, s[2:3] glc dlc 588; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 589; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 2.0, v1 590; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 591; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 592; GFX11-GISEL-FAKE16-NEXT: v_pack_b32_f16 v0, -v1, -v0 593; GFX11-GISEL-FAKE16-NEXT: ;;#ASMSTART 594; GFX11-GISEL-FAKE16-NEXT: ; use v0 595; GFX11-GISEL-FAKE16-NEXT: ;;#ASMEND 596; GFX11-GISEL-FAKE16-NEXT: s_endpgm 597; 598; GFX11-GCN-REAL16-LABEL: v_pack_b32.fneg: 599; GFX11-GCN-REAL16: ; %bb.0: 600; GFX11-GCN-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 601; GFX11-GCN-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 602; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 603; GFX11-GCN-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 604; GFX11-GCN-REAL16-NEXT: s_waitcnt lgkmcnt(0) 605; GFX11-GCN-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 606; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 607; GFX11-GCN-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 608; GFX11-GCN-REAL16-NEXT: s_waitcnt vmcnt(0) 609; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.l, v1.l 610; GFX11-GCN-REAL16-NEXT: v_mov_b16_e32 v0.h, v2.l 611; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 612; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l 613; GFX11-GCN-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h 614; GFX11-GCN-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 615; GFX11-GCN-REAL16-NEXT: v_pack_b32_f16 v0, -v0.l, -v0.h 616; GFX11-GCN-REAL16-NEXT: ;;#ASMSTART 617; GFX11-GCN-REAL16-NEXT: ; use v0 618; GFX11-GCN-REAL16-NEXT: ;;#ASMEND 619; GFX11-GCN-REAL16-NEXT: s_endpgm 620; 621; GFX11-GISEL-REAL16-LABEL: v_pack_b32.fneg: 622; GFX11-GISEL-REAL16: ; %bb.0: 623; GFX11-GISEL-REAL16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 624; GFX11-GISEL-REAL16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 625; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 626; GFX11-GISEL-REAL16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 627; GFX11-GISEL-REAL16-NEXT: s_waitcnt lgkmcnt(0) 628; GFX11-GISEL-REAL16-NEXT: global_load_u16 v1, v0, s[0:1] glc dlc 629; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 630; GFX11-GISEL-REAL16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc 631; GFX11-GISEL-REAL16-NEXT: s_waitcnt vmcnt(0) 632; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.l, 2.0, v1.l 633; GFX11-GISEL-REAL16-NEXT: v_add_f16_e32 v0.h, 2.0, v2.l 634; GFX11-GISEL-REAL16-NEXT: s_delay_alu instid0(VALU_DEP_1) 635; GFX11-GISEL-REAL16-NEXT: v_pack_b32_f16 v0, -v0.l, -v0.h 636; GFX11-GISEL-REAL16-NEXT: ;;#ASMSTART 637; GFX11-GISEL-REAL16-NEXT: ; use v0 638; GFX11-GISEL-REAL16-NEXT: ;;#ASMEND 639; GFX11-GISEL-REAL16-NEXT: s_endpgm 640 %tid = call i32 @llvm.amdgcn.workitem.id.x() 641 %tid.ext = sext i32 %tid to i64 642 %in0.gep = getelementptr inbounds half, ptr addrspace(1) %in0, i64 %tid.ext 643 %in1.gep = getelementptr inbounds half, ptr addrspace(1) %in1, i64 %tid.ext 644 %v0 = load volatile half, ptr addrspace(1) %in0.gep 645 %v1 = load volatile half, ptr addrspace(1) %in1.gep 646 %v0.add = fadd half %v0, 2.0 647 %v1.add = fadd half %v1, 2.0 648 %v0.fneg = fsub half -0.0, %v0.add 649 %v1.fneg = fsub half -0.0, %v1.add 650 %vec.0 = insertelement <2 x half> undef, half %v0.fneg, i32 0 651 %vec.1 = insertelement <2 x half> %vec.0, half %v1.fneg, i32 1 652 %vec.i32 = bitcast <2 x half> %vec.1 to i32 653 call void asm sideeffect "; use $0", "v"(i32 %vec.i32) #0 654 ret void 655} 656 657declare half @llvm.fabs.f16(half) #1 658 659attributes #0 = { nounwind } 660attributes #1 = { nounwind readnone } 661 662