1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 4 5declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 6 7define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) { 8; CI-LABEL: sint_to_fp_i32_to_f64: 9; CI: ; %bb.0: 10; CI-NEXT: s_load_dword s2, s[8:9], 0x2 11; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 12; CI-NEXT: s_waitcnt lgkmcnt(0) 13; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2 14; CI-NEXT: v_mov_b32_e32 v3, s1 15; CI-NEXT: v_mov_b32_e32 v2, s0 16; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 17; CI-NEXT: s_endpgm 18; 19; VI-LABEL: sint_to_fp_i32_to_f64: 20; VI: ; %bb.0: 21; VI-NEXT: s_load_dword s2, s[8:9], 0x8 22; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 23; VI-NEXT: s_waitcnt lgkmcnt(0) 24; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2 25; VI-NEXT: v_mov_b32_e32 v3, s1 26; VI-NEXT: v_mov_b32_e32 v2, s0 27; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 28; VI-NEXT: s_endpgm 29 %result = sitofp i32 %in to double 30 store double %result, ptr addrspace(1) %out 31 ret void 32} 33 34; We can't fold the SGPRs into v_cndmask_b32_e64, because it already 35; uses an SGPR (implicit vcc). 36define amdgpu_kernel void @sint_to_fp_i1_f64(ptr addrspace(1) %out, i32 %in) { 37; CI-LABEL: sint_to_fp_i1_f64: 38; CI: ; %bb.0: 39; CI-NEXT: s_load_dword s2, s[8:9], 0x2 40; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 41; CI-NEXT: v_mov_b32_e32 v0, 0 42; CI-NEXT: s_waitcnt lgkmcnt(0) 43; CI-NEXT: s_cmp_eq_u32 s2, 0 44; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0 45; CI-NEXT: v_mov_b32_e32 v3, s1 46; CI-NEXT: v_mov_b32_e32 v1, s2 47; CI-NEXT: v_mov_b32_e32 v2, s0 48; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 49; CI-NEXT: s_endpgm 50; 51; VI-LABEL: sint_to_fp_i1_f64: 52; VI: ; %bb.0: 53; VI-NEXT: s_load_dword s2, s[8:9], 0x8 54; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 55; VI-NEXT: v_mov_b32_e32 v0, 0 56; VI-NEXT: s_waitcnt lgkmcnt(0) 57; VI-NEXT: s_cmp_eq_u32 s2, 0 58; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0 59; VI-NEXT: v_mov_b32_e32 v3, s1 60; VI-NEXT: v_mov_b32_e32 v1, s2 61; VI-NEXT: v_mov_b32_e32 v2, s0 62; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 63; VI-NEXT: s_endpgm 64 %cmp = icmp eq i32 %in, 0 65 %fp = sitofp i1 %cmp to double 66 store double %fp, ptr addrspace(1) %out, align 4 67 ret void 68} 69 70define amdgpu_kernel void @sint_to_fp_i1_f64_load(ptr addrspace(1) %out, i1 %in) { 71; CI-LABEL: sint_to_fp_i1_f64_load: 72; CI: ; %bb.0: 73; CI-NEXT: s_load_dword s2, s[8:9], 0x2 74; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 75; CI-NEXT: s_waitcnt lgkmcnt(0) 76; CI-NEXT: s_bitcmp1_b32 s2, 0 77; CI-NEXT: s_cselect_b64 s[2:3], -1, 0 78; CI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3] 79; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0 80; CI-NEXT: v_mov_b32_e32 v3, s1 81; CI-NEXT: v_mov_b32_e32 v2, s0 82; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 83; CI-NEXT: s_endpgm 84; 85; VI-LABEL: sint_to_fp_i1_f64_load: 86; VI: ; %bb.0: 87; VI-NEXT: s_load_dword s2, s[8:9], 0x8 88; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 89; VI-NEXT: s_waitcnt lgkmcnt(0) 90; VI-NEXT: s_bitcmp1_b32 s2, 0 91; VI-NEXT: s_cselect_b64 s[2:3], -1, 0 92; VI-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[2:3] 93; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0 94; VI-NEXT: v_mov_b32_e32 v3, s1 95; VI-NEXT: v_mov_b32_e32 v2, s0 96; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 97; VI-NEXT: s_endpgm 98 %fp = sitofp i1 %in to double 99 store double %fp, ptr addrspace(1) %out, align 8 100 ret void 101} 102 103define amdgpu_kernel void @s_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, i64 %in) { 104; CI-LABEL: s_sint_to_fp_i64_to_f64: 105; CI: ; %bb.0: 106; CI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 107; CI-NEXT: s_waitcnt lgkmcnt(0) 108; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s3 109; CI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2 110; CI-NEXT: v_mov_b32_e32 v4, s0 111; CI-NEXT: v_mov_b32_e32 v5, s1 112; CI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32 113; CI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 114; CI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 115; CI-NEXT: s_endpgm 116; 117; VI-LABEL: s_sint_to_fp_i64_to_f64: 118; VI: ; %bb.0: 119; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 120; VI-NEXT: s_waitcnt lgkmcnt(0) 121; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s3 122; VI-NEXT: v_cvt_f64_u32_e32 v[2:3], s2 123; VI-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32 124; VI-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 125; VI-NEXT: v_mov_b32_e32 v2, s0 126; VI-NEXT: v_mov_b32_e32 v3, s1 127; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 128; VI-NEXT: s_endpgm 129 %result = sitofp i64 %in to double 130 store double %result, ptr addrspace(1) %out 131 ret void 132} 133 134define amdgpu_kernel void @v_sint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { 135; CI-LABEL: v_sint_to_fp_i64_to_f64: 136; CI: ; %bb.0: 137; CI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 138; CI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 139; CI-NEXT: s_waitcnt lgkmcnt(0) 140; CI-NEXT: v_mov_b32_e32 v1, s3 141; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v0 142; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 143; CI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 144; CI-NEXT: s_waitcnt vmcnt(0) 145; CI-NEXT: v_cvt_f64_i32_e32 v[1:2], v1 146; CI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 147; CI-NEXT: v_ldexp_f64 v[0:1], v[1:2], 32 148; CI-NEXT: v_mov_b32_e32 v2, s0 149; CI-NEXT: v_add_f64 v[0:1], v[0:1], v[3:4] 150; CI-NEXT: v_mov_b32_e32 v3, s1 151; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 152; CI-NEXT: s_endpgm 153; 154; VI-LABEL: v_sint_to_fp_i64_to_f64: 155; VI: ; %bb.0: 156; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 157; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 158; VI-NEXT: s_waitcnt lgkmcnt(0) 159; VI-NEXT: v_mov_b32_e32 v1, s3 160; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 161; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 162; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 163; VI-NEXT: s_waitcnt vmcnt(0) 164; VI-NEXT: v_cvt_f64_i32_e32 v[1:2], v1 165; VI-NEXT: v_cvt_f64_u32_e32 v[3:4], v0 166; VI-NEXT: v_ldexp_f64 v[1:2], v[1:2], 32 167; VI-NEXT: v_add_f64 v[0:1], v[1:2], v[3:4] 168; VI-NEXT: v_mov_b32_e32 v2, s0 169; VI-NEXT: v_mov_b32_e32 v3, s1 170; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 171; VI-NEXT: s_endpgm 172 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 173 %gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid 174 %val = load i64, ptr addrspace(1) %gep, align 8 175 %result = sitofp i64 %val to double 176 store double %result, ptr addrspace(1) %out 177 ret void 178} 179 180; FIXME: bfe and sext on VI+ 181define amdgpu_kernel void @s_sint_to_fp_i8_to_f64(ptr addrspace(1) %out, i8 %in) { 182; CI-LABEL: s_sint_to_fp_i8_to_f64: 183; CI: ; %bb.0: 184; CI-NEXT: s_load_dword s2, s[8:9], 0x2 185; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 186; CI-NEXT: s_waitcnt lgkmcnt(0) 187; CI-NEXT: s_sext_i32_i8 s2, s2 188; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2 189; CI-NEXT: v_mov_b32_e32 v3, s1 190; CI-NEXT: v_mov_b32_e32 v2, s0 191; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 192; CI-NEXT: s_endpgm 193; 194; VI-LABEL: s_sint_to_fp_i8_to_f64: 195; VI: ; %bb.0: 196; VI-NEXT: s_load_dword s2, s[8:9], 0x8 197; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 198; VI-NEXT: s_waitcnt lgkmcnt(0) 199; VI-NEXT: s_bfe_i32 s2, s2, 0x80000 200; VI-NEXT: s_sext_i32_i16 s2, s2 201; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], s2 202; VI-NEXT: v_mov_b32_e32 v3, s1 203; VI-NEXT: v_mov_b32_e32 v2, s0 204; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 205; VI-NEXT: s_endpgm 206 %fp = sitofp i8 %in to double 207 store double %fp, ptr addrspace(1) %out 208 ret void 209} 210 211define double @v_sint_to_fp_i8_to_f64(i8 %in) { 212; CI-LABEL: v_sint_to_fp_i8_to_f64: 213; CI: ; %bb.0: 214; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; CI-NEXT: v_bfe_i32 v0, v0, 0, 8 216; CI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0 217; CI-NEXT: s_setpc_b64 s[30:31] 218; 219; VI-LABEL: v_sint_to_fp_i8_to_f64: 220; VI: ; %bb.0: 221; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; VI-NEXT: v_bfe_i32 v0, v0, 0, 8 223; VI-NEXT: v_bfe_i32 v0, v0, 0, 16 224; VI-NEXT: v_cvt_f64_i32_e32 v[0:1], v0 225; VI-NEXT: s_setpc_b64 s[30:31] 226 %fp = sitofp i8 %in to double 227 ret double %fp 228 } 229 230define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { 231; CI-LABEL: s_select_sint_to_fp_i1_vals_f64: 232; CI: ; %bb.0: 233; CI-NEXT: s_load_dword s2, s[8:9], 0x2 234; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 235; CI-NEXT: v_mov_b32_e32 v0, 0 236; CI-NEXT: s_waitcnt lgkmcnt(0) 237; CI-NEXT: s_cmp_eq_u32 s2, 0 238; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0 239; CI-NEXT: v_mov_b32_e32 v3, s1 240; CI-NEXT: v_mov_b32_e32 v1, s2 241; CI-NEXT: v_mov_b32_e32 v2, s0 242; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 243; CI-NEXT: s_endpgm 244; 245; VI-LABEL: s_select_sint_to_fp_i1_vals_f64: 246; VI: ; %bb.0: 247; VI-NEXT: s_load_dword s2, s[8:9], 0x8 248; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 249; VI-NEXT: v_mov_b32_e32 v0, 0 250; VI-NEXT: s_waitcnt lgkmcnt(0) 251; VI-NEXT: s_cmp_eq_u32 s2, 0 252; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0 253; VI-NEXT: v_mov_b32_e32 v3, s1 254; VI-NEXT: v_mov_b32_e32 v1, s2 255; VI-NEXT: v_mov_b32_e32 v2, s0 256; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 257; VI-NEXT: s_endpgm 258 %cmp = icmp eq i32 %in, 0 259 %select = select i1 %cmp, double -1.0, double 0.0 260 store double %select, ptr addrspace(1) %out, align 8 261 ret void 262} 263 264define void @v_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { 265; GCN-LABEL: v_select_sint_to_fp_i1_vals_f64: 266; GCN: ; %bb.0: 267; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000 269; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 270; GCN-NEXT: v_mov_b32_e32 v3, 0 271; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc 272; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4] 273; GCN-NEXT: s_waitcnt vmcnt(0) 274; GCN-NEXT: s_setpc_b64 s[30:31] 275 %cmp = icmp eq i32 %in, 0 276 %select = select i1 %cmp, double -1.0, double 0.0 277 store double %select, ptr addrspace(1) %out, align 8 278 ret void 279} 280 281define amdgpu_kernel void @s_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) { 282; CI-LABEL: s_select_sint_to_fp_i1_vals_i64: 283; CI: ; %bb.0: 284; CI-NEXT: s_load_dword s2, s[8:9], 0x2 285; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 286; CI-NEXT: v_mov_b32_e32 v0, 0 287; CI-NEXT: s_waitcnt lgkmcnt(0) 288; CI-NEXT: s_cmp_eq_u32 s2, 0 289; CI-NEXT: s_cselect_b32 s2, 0xbff00000, 0 290; CI-NEXT: v_mov_b32_e32 v3, s1 291; CI-NEXT: v_mov_b32_e32 v1, s2 292; CI-NEXT: v_mov_b32_e32 v2, s0 293; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 294; CI-NEXT: s_endpgm 295; 296; VI-LABEL: s_select_sint_to_fp_i1_vals_i64: 297; VI: ; %bb.0: 298; VI-NEXT: s_load_dword s2, s[8:9], 0x8 299; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 300; VI-NEXT: v_mov_b32_e32 v0, 0 301; VI-NEXT: s_waitcnt lgkmcnt(0) 302; VI-NEXT: s_cmp_eq_u32 s2, 0 303; VI-NEXT: s_cselect_b32 s2, 0xbff00000, 0 304; VI-NEXT: v_mov_b32_e32 v3, s1 305; VI-NEXT: v_mov_b32_e32 v1, s2 306; VI-NEXT: v_mov_b32_e32 v2, s0 307; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 308; VI-NEXT: s_endpgm 309 %cmp = icmp eq i32 %in, 0 310 %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0 311 store i64 %select, ptr addrspace(1) %out, align 8 312 ret void 313} 314 315define void @v_select_sint_to_fp_i1_vals_i64(ptr addrspace(1) %out, i32 %in) { 316; GCN-LABEL: v_select_sint_to_fp_i1_vals_i64: 317; GCN: ; %bb.0: 318; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 319; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000 320; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 321; GCN-NEXT: v_mov_b32_e32 v3, 0 322; GCN-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc 323; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4] 324; GCN-NEXT: s_waitcnt vmcnt(0) 325; GCN-NEXT: s_setpc_b64 s[30:31] 326 %cmp = icmp eq i32 %in, 0 327 %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0 328 store i64 %select, ptr addrspace(1) %out, align 8 329 ret void 330} 331 332; TODO: This should swap the selected order / invert the compare and do it. 333define void @v_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { 334; GCN-LABEL: v_swap_select_sint_to_fp_i1_vals_f64: 335; GCN: ; %bb.0: 336; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; GCN-NEXT: v_mov_b32_e32 v4, 0xbff00000 338; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 339; GCN-NEXT: v_mov_b32_e32 v3, 0 340; GCN-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc 341; GCN-NEXT: flat_store_dwordx2 v[0:1], v[3:4] 342; GCN-NEXT: s_waitcnt vmcnt(0) 343; GCN-NEXT: s_setpc_b64 s[30:31] 344 %cmp = icmp eq i32 %in, 0 345 %select = select i1 %cmp, double 0.0, double -1.0 346 store double %select, ptr addrspace(1) %out, align 8 347 ret void 348} 349 350; TODO: This should swap the selected order / invert the compare and do it. 351define amdgpu_kernel void @s_swap_select_sint_to_fp_i1_vals_f64(ptr addrspace(1) %out, i32 %in) { 352; CI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64: 353; CI: ; %bb.0: 354; CI-NEXT: s_load_dword s2, s[8:9], 0x2 355; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 356; CI-NEXT: v_mov_b32_e32 v0, 0 357; CI-NEXT: s_waitcnt lgkmcnt(0) 358; CI-NEXT: s_cmp_eq_u32 s2, 0 359; CI-NEXT: s_cselect_b32 s2, 0, 0xbff00000 360; CI-NEXT: v_mov_b32_e32 v3, s1 361; CI-NEXT: v_mov_b32_e32 v1, s2 362; CI-NEXT: v_mov_b32_e32 v2, s0 363; CI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 364; CI-NEXT: s_endpgm 365; 366; VI-LABEL: s_swap_select_sint_to_fp_i1_vals_f64: 367; VI: ; %bb.0: 368; VI-NEXT: s_load_dword s2, s[8:9], 0x8 369; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 370; VI-NEXT: v_mov_b32_e32 v0, 0 371; VI-NEXT: s_waitcnt lgkmcnt(0) 372; VI-NEXT: s_cmp_eq_u32 s2, 0 373; VI-NEXT: s_cselect_b32 s2, 0, 0xbff00000 374; VI-NEXT: v_mov_b32_e32 v3, s1 375; VI-NEXT: v_mov_b32_e32 v1, s2 376; VI-NEXT: v_mov_b32_e32 v2, s0 377; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 378; VI-NEXT: s_endpgm 379 %cmp = icmp eq i32 %in, 0 380 %select = select i1 %cmp, double 0.0, double -1.0 381 store double %select, ptr addrspace(1) %out, align 8 382 ret void 383} 384