1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SI 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VI 4; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG 5 6declare float @llvm.fabs.f32(float) #1 7 8define amdgpu_kernel void @fp_to_sint_i32(ptr addrspace(1) %out, float %in) { 9; SI-LABEL: fp_to_sint_i32: 10; SI: ; %bb.0: 11; SI-NEXT: s_load_dword s6, s[4:5], 0xb 12; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 13; SI-NEXT: s_mov_b32 s3, 0xf000 14; SI-NEXT: s_mov_b32 s2, -1 15; SI-NEXT: s_waitcnt lgkmcnt(0) 16; SI-NEXT: v_cvt_i32_f32_e32 v0, s6 17; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 18; SI-NEXT: s_endpgm 19; 20; VI-LABEL: fp_to_sint_i32: 21; VI: ; %bb.0: 22; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 23; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 24; VI-NEXT: s_mov_b32 s3, 0xf000 25; VI-NEXT: s_waitcnt lgkmcnt(0) 26; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 27; VI-NEXT: s_mov_b32 s2, -1 28; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 29; VI-NEXT: s_endpgm 30; 31; EG-LABEL: fp_to_sint_i32: 32; EG: ; %bb.0: 33; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 34; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 35; EG-NEXT: CF_END 36; EG-NEXT: PAD 37; EG-NEXT: ALU clause starting at 4: 38; EG-NEXT: TRUNC * T0.W, KC0[2].Z, 39; EG-NEXT: FLT_TO_INT T0.X, PV.W, 40; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 41; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 42 %conv = fptosi float %in to i32 43 store i32 %conv, ptr addrspace(1) %out 44 ret void 45} 46 47define amdgpu_kernel void @fp_to_sint_i32_fabs(ptr addrspace(1) %out, float %in) { 48; SI-LABEL: fp_to_sint_i32_fabs: 49; SI: ; %bb.0: 50; SI-NEXT: s_load_dword s6, s[4:5], 0xb 51; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 52; SI-NEXT: s_mov_b32 s3, 0xf000 53; SI-NEXT: s_mov_b32 s2, -1 54; SI-NEXT: s_waitcnt lgkmcnt(0) 55; SI-NEXT: v_cvt_i32_f32_e64 v0, |s6| 56; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 57; SI-NEXT: s_endpgm 58; 59; VI-LABEL: fp_to_sint_i32_fabs: 60; VI: ; %bb.0: 61; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 62; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 63; VI-NEXT: s_mov_b32 s3, 0xf000 64; VI-NEXT: s_waitcnt lgkmcnt(0) 65; VI-NEXT: v_cvt_i32_f32_e64 v0, |s2| 66; VI-NEXT: s_mov_b32 s2, -1 67; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 68; VI-NEXT: s_endpgm 69; 70; EG-LABEL: fp_to_sint_i32_fabs: 71; EG: ; %bb.0: 72; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 73; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 74; EG-NEXT: CF_END 75; EG-NEXT: PAD 76; EG-NEXT: ALU clause starting at 4: 77; EG-NEXT: TRUNC * T0.W, |KC0[2].Z|, 78; EG-NEXT: FLT_TO_INT T0.X, PV.W, 79; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 80; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 81 %in.fabs = call float @llvm.fabs.f32(float %in) 82 %conv = fptosi float %in.fabs to i32 83 store i32 %conv, ptr addrspace(1) %out 84 ret void 85} 86 87define amdgpu_kernel void @fp_to_sint_v2i32(ptr addrspace(1) %out, <2 x float> %in) { 88; SI-LABEL: fp_to_sint_v2i32: 89; SI: ; %bb.0: 90; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 91; SI-NEXT: s_mov_b32 s7, 0xf000 92; SI-NEXT: s_mov_b32 s6, -1 93; SI-NEXT: s_waitcnt lgkmcnt(0) 94; SI-NEXT: s_mov_b32 s4, s0 95; SI-NEXT: s_mov_b32 s5, s1 96; SI-NEXT: v_cvt_i32_f32_e32 v1, s3 97; SI-NEXT: v_cvt_i32_f32_e32 v0, s2 98; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 99; SI-NEXT: s_endpgm 100; 101; VI-LABEL: fp_to_sint_v2i32: 102; VI: ; %bb.0: 103; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 104; VI-NEXT: s_mov_b32 s7, 0xf000 105; VI-NEXT: s_mov_b32 s6, -1 106; VI-NEXT: s_waitcnt lgkmcnt(0) 107; VI-NEXT: v_cvt_i32_f32_e32 v1, s3 108; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 109; VI-NEXT: s_mov_b32 s4, s0 110; VI-NEXT: s_mov_b32 s5, s1 111; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 112; VI-NEXT: s_endpgm 113; 114; EG-LABEL: fp_to_sint_v2i32: 115; EG: ; %bb.0: 116; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 117; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 118; EG-NEXT: CF_END 119; EG-NEXT: PAD 120; EG-NEXT: ALU clause starting at 4: 121; EG-NEXT: TRUNC * T0.W, KC0[3].X, 122; EG-NEXT: FLT_TO_INT T0.Y, PV.W, 123; EG-NEXT: TRUNC * T0.W, KC0[2].W, 124; EG-NEXT: FLT_TO_INT T0.X, PV.W, 125; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 126; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 127 %result = fptosi <2 x float> %in to <2 x i32> 128 store <2 x i32> %result, ptr addrspace(1) %out 129 ret void 130} 131 132define amdgpu_kernel void @fp_to_sint_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 133; SI-LABEL: fp_to_sint_v4i32: 134; SI: ; %bb.0: 135; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 136; SI-NEXT: s_waitcnt lgkmcnt(0) 137; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 138; SI-NEXT: s_mov_b32 s3, 0xf000 139; SI-NEXT: s_mov_b32 s2, -1 140; SI-NEXT: s_waitcnt lgkmcnt(0) 141; SI-NEXT: v_cvt_i32_f32_e32 v3, s7 142; SI-NEXT: v_cvt_i32_f32_e32 v2, s6 143; SI-NEXT: v_cvt_i32_f32_e32 v1, s5 144; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 145; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 146; SI-NEXT: s_endpgm 147; 148; VI-LABEL: fp_to_sint_v4i32: 149; VI: ; %bb.0: 150; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 151; VI-NEXT: s_waitcnt lgkmcnt(0) 152; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 153; VI-NEXT: s_mov_b32 s3, 0xf000 154; VI-NEXT: s_mov_b32 s2, -1 155; VI-NEXT: s_waitcnt lgkmcnt(0) 156; VI-NEXT: v_cvt_i32_f32_e32 v3, s7 157; VI-NEXT: v_cvt_i32_f32_e32 v2, s6 158; VI-NEXT: v_cvt_i32_f32_e32 v1, s5 159; VI-NEXT: v_cvt_i32_f32_e32 v0, s4 160; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 161; VI-NEXT: s_endpgm 162; 163; EG-LABEL: fp_to_sint_v4i32: 164; EG: ; %bb.0: 165; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 166; EG-NEXT: TEX 0 @6 167; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[] 168; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 169; EG-NEXT: CF_END 170; EG-NEXT: PAD 171; EG-NEXT: Fetch clause starting at 6: 172; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 173; EG-NEXT: ALU clause starting at 8: 174; EG-NEXT: MOV * T0.X, KC0[2].Z, 175; EG-NEXT: ALU clause starting at 9: 176; EG-NEXT: TRUNC T0.W, T0.W, 177; EG-NEXT: TRUNC * T1.W, T0.Z, 178; EG-NEXT: FLT_TO_INT * T0.W, PV.W, 179; EG-NEXT: FLT_TO_INT T0.Z, T1.W, 180; EG-NEXT: TRUNC * T1.W, T0.Y, 181; EG-NEXT: FLT_TO_INT T0.Y, PV.W, 182; EG-NEXT: TRUNC * T1.W, T0.X, 183; EG-NEXT: FLT_TO_INT T0.X, PV.W, 184; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 185; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 186 %value = load <4 x float>, ptr addrspace(1) %in 187 %result = fptosi <4 x float> %value to <4 x i32> 188 store <4 x i32> %result, ptr addrspace(1) %out 189 ret void 190} 191 192; Check that the compiler doesn't crash with a "cannot select" error 193define amdgpu_kernel void @fp_to_sint_i64 (ptr addrspace(1) %out, float %in) { 194; SI-LABEL: fp_to_sint_i64: 195; SI: ; %bb.0: ; %entry 196; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 197; SI-NEXT: s_load_dword s4, s[4:5], 0xb 198; SI-NEXT: s_mov_b32 s3, 0xf000 199; SI-NEXT: s_mov_b32 s2, -1 200; SI-NEXT: s_mov_b32 s5, 0x2f800000 201; SI-NEXT: s_mov_b32 s6, 0xcf800000 202; SI-NEXT: s_waitcnt lgkmcnt(0) 203; SI-NEXT: v_trunc_f32_e32 v0, s4 204; SI-NEXT: v_mul_f32_e64 v1, |v0|, s5 205; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0 206; SI-NEXT: v_floor_f32_e32 v1, v1 207; SI-NEXT: v_cvt_u32_f32_e32 v3, v1 208; SI-NEXT: v_fma_f32 v0, v1, s6, |v0| 209; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 210; SI-NEXT: v_xor_b32_e32 v1, v3, v2 211; SI-NEXT: v_xor_b32_e32 v0, v0, v2 212; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 213; SI-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc 214; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 215; SI-NEXT: s_endpgm 216; 217; VI-LABEL: fp_to_sint_i64: 218; VI: ; %bb.0: ; %entry 219; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 220; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 221; VI-NEXT: s_mov_b32 s4, 0x2f800000 222; VI-NEXT: s_mov_b32 s5, 0xcf800000 223; VI-NEXT: s_mov_b32 s3, 0xf000 224; VI-NEXT: s_waitcnt lgkmcnt(0) 225; VI-NEXT: v_trunc_f32_e32 v0, s2 226; VI-NEXT: v_mul_f32_e64 v1, |v0|, s4 227; VI-NEXT: v_floor_f32_e32 v1, v1 228; VI-NEXT: v_fma_f32 v2, v1, s5, |v0| 229; VI-NEXT: v_cvt_u32_f32_e32 v2, v2 230; VI-NEXT: v_cvt_u32_f32_e32 v1, v1 231; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0 232; VI-NEXT: s_mov_b32 s2, -1 233; VI-NEXT: v_xor_b32_e32 v0, v2, v3 234; VI-NEXT: v_xor_b32_e32 v1, v1, v3 235; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 236; VI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 237; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 238; VI-NEXT: s_endpgm 239; 240; EG-LABEL: fp_to_sint_i64: 241; EG: ; %bb.0: ; %entry 242; EG-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] 243; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 244; EG-NEXT: CF_END 245; EG-NEXT: PAD 246; EG-NEXT: ALU clause starting at 4: 247; EG-NEXT: MOV * T0.W, literal.x, 248; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 249; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W, 250; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y, 251; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 252; EG-NEXT: OR_INT T1.W, PS, literal.x, 253; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y, 254; EG-NEXT: 8388608(1.175494e-38), -150(nan) 255; EG-NEXT: ADD_INT T0.X, T0.W, literal.x, 256; EG-NEXT: AND_INT T0.Y, PS, literal.y, 257; EG-NEXT: SUB_INT T0.Z, literal.z, T0.W, 258; EG-NEXT: NOT_INT T0.W, PS, 259; EG-NEXT: LSHR * T3.W, PV.W, 1, 260; EG-NEXT: -127(nan), 31(4.344025e-44) 261; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 262; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, 263; EG-NEXT: AND_INT T1.Y, PV.Z, literal.x, 264; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.W, PV.Z, 265; EG-NEXT: LSHL T0.W, T1.W, PV.Y, 266; EG-NEXT: AND_INT * T1.W, T2.W, literal.x, 267; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 268; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 269; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, 270; EG-NEXT: CNDE_INT T0.W, PS, PV.X, PV.W, 271; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x, 272; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 273; EG-NEXT: CNDE_INT T1.Z, PS, 0.0, PV.W, 274; EG-NEXT: CNDE_INT T0.W, PS, PV.Z, PV.Y, 275; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x, 276; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 277; EG-NEXT: XOR_INT T0.W, PV.W, PS, 278; EG-NEXT: XOR_INT * T2.W, PV.Z, PS, 279; EG-NEXT: SUB_INT T2.W, PS, T1.W, 280; EG-NEXT: SUBB_UINT * T3.W, PV.W, T1.W, 281; EG-NEXT: SUB_INT T2.W, PV.W, PS, 282; EG-NEXT: SETGT_INT * T3.W, 0.0, T0.X, 283; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 284; EG-NEXT: SUB_INT * T0.W, T0.W, T1.W, 285; EG-NEXT: CNDE_INT T0.X, T3.W, PV.W, 0.0, 286; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 287; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 288entry: 289 %0 = fptosi float %in to i64 290 store i64 %0, ptr addrspace(1) %out 291 ret void 292} 293 294define amdgpu_kernel void @fp_to_sint_v2i64(ptr addrspace(1) %out, <2 x float> %x) { 295; SI-LABEL: fp_to_sint_v2i64: 296; SI: ; %bb.0: 297; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 298; SI-NEXT: s_mov_b32 s3, 0xf000 299; SI-NEXT: s_mov_b32 s2, -1 300; SI-NEXT: s_mov_b32 s8, 0x2f800000 301; SI-NEXT: s_mov_b32 s9, 0xcf800000 302; SI-NEXT: s_waitcnt lgkmcnt(0) 303; SI-NEXT: s_mov_b32 s0, s4 304; SI-NEXT: s_mov_b32 s1, s5 305; SI-NEXT: v_trunc_f32_e32 v0, s7 306; SI-NEXT: v_trunc_f32_e32 v1, s6 307; SI-NEXT: v_mul_f32_e64 v2, |v0|, s8 308; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v0 309; SI-NEXT: v_mul_f32_e64 v4, |v1|, s8 310; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v1 311; SI-NEXT: v_floor_f32_e32 v2, v2 312; SI-NEXT: v_floor_f32_e32 v4, v4 313; SI-NEXT: v_cvt_u32_f32_e32 v6, v2 314; SI-NEXT: v_fma_f32 v0, v2, s9, |v0| 315; SI-NEXT: v_cvt_u32_f32_e32 v2, v4 316; SI-NEXT: v_fma_f32 v1, v4, s9, |v1| 317; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 318; SI-NEXT: v_xor_b32_e32 v4, v6, v3 319; SI-NEXT: v_cvt_u32_f32_e32 v1, v1 320; SI-NEXT: v_xor_b32_e32 v6, v2, v5 321; SI-NEXT: v_xor_b32_e32 v0, v0, v3 322; SI-NEXT: v_xor_b32_e32 v1, v1, v5 323; SI-NEXT: v_sub_i32_e32 v2, vcc, v0, v3 324; SI-NEXT: v_subb_u32_e32 v3, vcc, v4, v3, vcc 325; SI-NEXT: v_sub_i32_e32 v0, vcc, v1, v5 326; SI-NEXT: v_subb_u32_e32 v1, vcc, v6, v5, vcc 327; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 328; SI-NEXT: s_endpgm 329; 330; VI-LABEL: fp_to_sint_v2i64: 331; VI: ; %bb.0: 332; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 333; VI-NEXT: s_mov_b32 s8, 0x2f800000 334; VI-NEXT: s_mov_b32 s7, 0xf000 335; VI-NEXT: s_mov_b32 s6, -1 336; VI-NEXT: s_waitcnt lgkmcnt(0) 337; VI-NEXT: v_trunc_f32_e32 v0, s3 338; VI-NEXT: v_mul_f32_e64 v1, |v0|, s8 339; VI-NEXT: s_mov_b32 s4, s0 340; VI-NEXT: v_floor_f32_e32 v1, v1 341; VI-NEXT: s_mov_b32 s0, 0xcf800000 342; VI-NEXT: v_fma_f32 v2, v1, s0, |v0| 343; VI-NEXT: v_trunc_f32_e32 v4, s2 344; VI-NEXT: v_cvt_u32_f32_e32 v2, v2 345; VI-NEXT: v_mul_f32_e64 v3, |v4|, s8 346; VI-NEXT: v_cvt_u32_f32_e32 v1, v1 347; VI-NEXT: v_floor_f32_e32 v3, v3 348; VI-NEXT: v_cvt_u32_f32_e32 v5, v3 349; VI-NEXT: v_fma_f32 v3, v3, s0, |v4| 350; VI-NEXT: v_ashrrev_i32_e32 v0, 31, v0 351; VI-NEXT: v_cvt_u32_f32_e32 v6, v3 352; VI-NEXT: v_xor_b32_e32 v2, v2, v0 353; VI-NEXT: v_xor_b32_e32 v1, v1, v0 354; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v0 355; VI-NEXT: v_subb_u32_e32 v3, vcc, v1, v0, vcc 356; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v4 357; VI-NEXT: v_xor_b32_e32 v0, v6, v1 358; VI-NEXT: v_xor_b32_e32 v4, v5, v1 359; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 360; VI-NEXT: s_mov_b32 s5, s1 361; VI-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc 362; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 363; VI-NEXT: s_endpgm 364; 365; EG-LABEL: fp_to_sint_v2i64: 366; EG: ; %bb.0: 367; EG-NEXT: ALU 74, @4, KC0[CB0:0-32], KC1[] 368; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 369; EG-NEXT: CF_END 370; EG-NEXT: PAD 371; EG-NEXT: ALU clause starting at 4: 372; EG-NEXT: MOV * T0.W, literal.x, 373; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 374; EG-NEXT: BFE_UINT T0.Z, KC0[3].X, literal.x, PV.W, 375; EG-NEXT: BFE_UINT T0.W, KC0[2].W, literal.x, PV.W, 376; EG-NEXT: AND_INT * T1.Z, KC0[2].W, literal.y, 377; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 378; EG-NEXT: ADD_INT T1.W, PV.W, literal.x, 379; EG-NEXT: ADD_INT * T2.W, PV.Z, literal.x, 380; EG-NEXT: -150(nan), 0(0.000000e+00) 381; EG-NEXT: AND_INT T0.X, PS, literal.x, 382; EG-NEXT: AND_INT T0.Y, PV.W, literal.x, 383; EG-NEXT: OR_INT T1.Z, T1.Z, literal.y, 384; EG-NEXT: SUB_INT T3.W, literal.z, T0.W, 385; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w, 386; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38) 387; EG-NEXT: 150(2.101948e-43), 8388607(1.175494e-38) 388; EG-NEXT: OR_INT T1.X, PS, literal.x, 389; EG-NEXT: AND_INT T1.Y, PV.W, literal.y, 390; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.Z, PV.W, 391; EG-NEXT: LSHL T3.W, PV.Z, PV.Y, 392; EG-NEXT: AND_INT * T4.W, T1.W, literal.y, 393; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44) 394; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 395; EG-NEXT: CNDE_INT T2.Z, PV.Y, PV.Z, 0.0, 396; EG-NEXT: LSHL T5.W, PV.X, T0.X, 397; EG-NEXT: AND_INT * T6.W, T2.W, literal.x, 398; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 399; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0, 400; EG-NEXT: NOT_INT T1.Y, T1.W, 401; EG-NEXT: SUB_INT T3.Z, literal.x, T0.Z, 402; EG-NEXT: NOT_INT T1.W, T2.W, BS:VEC_120/SCL_212 403; EG-NEXT: LSHR * T2.W, T1.X, 1, 404; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 405; EG-NEXT: LSHR T2.X, T1.Z, 1, 406; EG-NEXT: ADD_INT T2.Y, T0.Z, literal.x, BS:VEC_120/SCL_212 407; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, PS, PV.W, 408; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.X, PV.Z, 409; EG-NEXT: AND_INT * T2.W, PV.Z, literal.y, 410; EG-NEXT: -127(nan), 32(4.484155e-44) 411; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, 412; EG-NEXT: CNDE_INT T3.Y, T6.W, PV.Z, T5.W, BS:VEC_021/SCL_122 413; EG-NEXT: SETGT_INT T0.Z, PV.Y, literal.x, 414; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, PV.X, T1.Y, 415; EG-NEXT: ADD_INT * T0.W, T0.W, literal.y, 416; EG-NEXT: 23(3.222986e-44), -127(nan) 417; EG-NEXT: CNDE_INT T2.X, T4.W, PV.W, T3.W, 418; EG-NEXT: SETGT_INT T1.Y, PS, literal.x, 419; EG-NEXT: CNDE_INT T1.Z, PV.Z, 0.0, PV.Y, 420; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T0.X, 421; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y, 422; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) 423; EG-NEXT: XOR_INT T0.X, PV.W, PS, 424; EG-NEXT: XOR_INT T3.Y, PV.Z, PS, 425; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X, 426; EG-NEXT: CNDE_INT T1.W, PV.Y, T2.Z, T0.Y, 427; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x, 428; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 429; EG-NEXT: XOR_INT T0.Y, PV.W, PS, 430; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, 431; EG-NEXT: SUB_INT T1.W, PV.Y, T2.W, 432; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W, 433; EG-NEXT: SUB_INT T1.Y, PV.W, PS, 434; EG-NEXT: SETGT_INT T1.Z, 0.0, T2.Y, 435; EG-NEXT: SUB_INT T1.W, PV.Z, T3.W, 436; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W, 437; EG-NEXT: SUB_INT T0.Z, PV.W, PS, 438; EG-NEXT: SETGT_INT T0.W, 0.0, T0.W, 439; EG-NEXT: CNDE_INT * T1.W, PV.Z, PV.Y, 0.0, 440; EG-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, 0.0, 441; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W, 442; EG-NEXT: CNDE_INT T1.Z, T1.Z, PV.W, 0.0, 443; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W, 444; EG-NEXT: CNDE_INT T1.X, T0.W, PV.W, 0.0, 445; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 446; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 447 %conv = fptosi <2 x float> %x to <2 x i64> 448 store <2 x i64> %conv, ptr addrspace(1) %out 449 ret void 450} 451 452define amdgpu_kernel void @fp_to_sint_v4i64(ptr addrspace(1) %out, <4 x float> %x) { 453; SI-LABEL: fp_to_sint_v4i64: 454; SI: ; %bb.0: 455; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 456; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0xd 457; SI-NEXT: s_mov_b32 s3, 0xf000 458; SI-NEXT: s_mov_b32 s2, -1 459; SI-NEXT: s_mov_b32 s8, 0x2f800000 460; SI-NEXT: s_mov_b32 s9, 0xcf800000 461; SI-NEXT: s_waitcnt lgkmcnt(0) 462; SI-NEXT: v_trunc_f32_e32 v0, s5 463; SI-NEXT: v_trunc_f32_e32 v1, s4 464; SI-NEXT: v_trunc_f32_e32 v2, s7 465; SI-NEXT: v_trunc_f32_e32 v3, s6 466; SI-NEXT: v_mul_f32_e64 v4, |v0|, s8 467; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v0 468; SI-NEXT: v_mul_f32_e64 v6, |v1|, s8 469; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v1 470; SI-NEXT: v_mul_f32_e64 v8, |v2|, s8 471; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v2 472; SI-NEXT: v_mul_f32_e64 v10, |v3|, s8 473; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v3 474; SI-NEXT: v_floor_f32_e32 v4, v4 475; SI-NEXT: v_floor_f32_e32 v6, v6 476; SI-NEXT: v_floor_f32_e32 v8, v8 477; SI-NEXT: v_floor_f32_e32 v10, v10 478; SI-NEXT: v_cvt_u32_f32_e32 v12, v4 479; SI-NEXT: v_fma_f32 v0, v4, s9, |v0| 480; SI-NEXT: v_cvt_u32_f32_e32 v4, v6 481; SI-NEXT: v_fma_f32 v1, v6, s9, |v1| 482; SI-NEXT: v_cvt_u32_f32_e32 v6, v8 483; SI-NEXT: v_fma_f32 v2, v8, s9, |v2| 484; SI-NEXT: v_cvt_u32_f32_e32 v8, v10 485; SI-NEXT: v_fma_f32 v3, v10, s9, |v3| 486; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 487; SI-NEXT: v_xor_b32_e32 v10, v12, v5 488; SI-NEXT: v_cvt_u32_f32_e32 v1, v1 489; SI-NEXT: v_xor_b32_e32 v4, v4, v7 490; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 491; SI-NEXT: v_xor_b32_e32 v12, v6, v9 492; SI-NEXT: v_cvt_u32_f32_e32 v3, v3 493; SI-NEXT: v_xor_b32_e32 v8, v8, v11 494; SI-NEXT: v_xor_b32_e32 v0, v0, v5 495; SI-NEXT: v_xor_b32_e32 v1, v1, v7 496; SI-NEXT: v_xor_b32_e32 v6, v2, v9 497; SI-NEXT: v_xor_b32_e32 v13, v3, v11 498; SI-NEXT: v_sub_i32_e32 v2, vcc, v0, v5 499; SI-NEXT: v_subb_u32_e32 v3, vcc, v10, v5, vcc 500; SI-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 501; SI-NEXT: v_subb_u32_e32 v1, vcc, v4, v7, vcc 502; SI-NEXT: v_sub_i32_e32 v6, vcc, v6, v9 503; SI-NEXT: v_subb_u32_e32 v7, vcc, v12, v9, vcc 504; SI-NEXT: v_sub_i32_e32 v4, vcc, v13, v11 505; SI-NEXT: v_subb_u32_e32 v5, vcc, v8, v11, vcc 506; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 507; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 508; SI-NEXT: s_endpgm 509; 510; VI-LABEL: fp_to_sint_v4i64: 511; VI: ; %bb.0: 512; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 513; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 514; VI-NEXT: s_mov_b32 s4, 0x2f800000 515; VI-NEXT: s_mov_b32 s5, 0xcf800000 516; VI-NEXT: s_mov_b32 s3, 0xf000 517; VI-NEXT: s_waitcnt lgkmcnt(0) 518; VI-NEXT: v_trunc_f32_e32 v0, s9 519; VI-NEXT: v_mul_f32_e64 v1, |v0|, s4 520; VI-NEXT: v_floor_f32_e32 v1, v1 521; VI-NEXT: v_fma_f32 v2, v1, s5, |v0| 522; VI-NEXT: v_cvt_u32_f32_e32 v2, v2 523; VI-NEXT: v_trunc_f32_e32 v4, s8 524; VI-NEXT: v_cvt_u32_f32_e32 v1, v1 525; VI-NEXT: v_mul_f32_e64 v3, |v4|, s4 526; VI-NEXT: v_floor_f32_e32 v3, v3 527; VI-NEXT: v_ashrrev_i32_e32 v0, 31, v0 528; VI-NEXT: v_cvt_u32_f32_e32 v5, v3 529; VI-NEXT: v_fma_f32 v3, v3, s5, |v4| 530; VI-NEXT: v_xor_b32_e32 v2, v2, v0 531; VI-NEXT: v_cvt_u32_f32_e32 v6, v3 532; VI-NEXT: v_xor_b32_e32 v1, v1, v0 533; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v0 534; VI-NEXT: v_subb_u32_e32 v3, vcc, v1, v0, vcc 535; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v4 536; VI-NEXT: v_xor_b32_e32 v4, v5, v1 537; VI-NEXT: v_trunc_f32_e32 v5, s11 538; VI-NEXT: v_xor_b32_e32 v0, v6, v1 539; VI-NEXT: v_mul_f32_e64 v6, |v5|, s4 540; VI-NEXT: v_floor_f32_e32 v6, v6 541; VI-NEXT: v_cvt_u32_f32_e32 v7, v6 542; VI-NEXT: v_fma_f32 v6, v6, s5, |v5| 543; VI-NEXT: v_cvt_u32_f32_e32 v6, v6 544; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 545; VI-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc 546; VI-NEXT: v_ashrrev_i32_e32 v4, 31, v5 547; VI-NEXT: v_trunc_f32_e32 v8, s10 548; VI-NEXT: v_xor_b32_e32 v5, v6, v4 549; VI-NEXT: v_mul_f32_e64 v6, |v8|, s4 550; VI-NEXT: v_floor_f32_e32 v6, v6 551; VI-NEXT: v_cvt_u32_f32_e32 v9, v6 552; VI-NEXT: v_fma_f32 v6, v6, s5, |v8| 553; VI-NEXT: v_cvt_u32_f32_e32 v10, v6 554; VI-NEXT: v_xor_b32_e32 v7, v7, v4 555; VI-NEXT: v_sub_u32_e32 v6, vcc, v5, v4 556; VI-NEXT: v_ashrrev_i32_e32 v5, 31, v8 557; VI-NEXT: v_subb_u32_e32 v7, vcc, v7, v4, vcc 558; VI-NEXT: v_xor_b32_e32 v4, v10, v5 559; VI-NEXT: v_xor_b32_e32 v8, v9, v5 560; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v5 561; VI-NEXT: s_mov_b32 s2, -1 562; VI-NEXT: v_subb_u32_e32 v5, vcc, v8, v5, vcc 563; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 564; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 565; VI-NEXT: s_endpgm 566; 567; EG-LABEL: fp_to_sint_v4i64: 568; EG: ; %bb.0: 569; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[] 570; EG-NEXT: ALU 54, @106, KC0[CB0:0-32], KC1[] 571; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 572; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T0.X, 1 573; EG-NEXT: CF_END 574; EG-NEXT: PAD 575; EG-NEXT: ALU clause starting at 6: 576; EG-NEXT: MOV * T0.W, literal.x, 577; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 578; EG-NEXT: BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W, 579; EG-NEXT: AND_INT * T2.W, KC0[3].Z, literal.y, 580; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 581; EG-NEXT: OR_INT T2.W, PS, literal.x, 582; EG-NEXT: ADD_INT * T3.W, PV.W, literal.y, 583; EG-NEXT: 8388608(1.175494e-38), -150(nan) 584; EG-NEXT: ADD_INT T0.X, T1.W, literal.x, 585; EG-NEXT: BFE_UINT T0.Y, KC0[4].X, literal.y, T0.W, 586; EG-NEXT: AND_INT T0.Z, PS, literal.z, 587; EG-NEXT: NOT_INT T4.W, PS, 588; EG-NEXT: LSHR * T5.W, PV.W, 1, 589; EG-NEXT: -127(nan), 23(3.222986e-44) 590; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 591; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, 592; EG-NEXT: AND_INT T1.Y, T3.W, literal.x, 593; EG-NEXT: LSHL T0.Z, T2.W, PV.Z, BS:VEC_120/SCL_212 594; EG-NEXT: AND_INT T3.W, KC0[4].X, literal.y, 595; EG-NEXT: ADD_INT * T4.W, PV.Y, literal.z, 596; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38) 597; EG-NEXT: -150(nan), 0(0.000000e+00) 598; EG-NEXT: AND_INT T2.Y, PS, literal.x, 599; EG-NEXT: OR_INT T1.Z, PV.W, literal.y, 600; EG-NEXT: CNDE_INT T3.W, PV.Y, PV.X, PV.Z, 601; EG-NEXT: SETGT_INT * T5.W, T0.X, literal.z, 602; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38) 603; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 604; EG-NEXT: CNDE_INT T3.Y, PS, 0.0, PV.W, 605; EG-NEXT: SUB_INT T2.Z, literal.x, T1.W, 606; EG-NEXT: LSHL T1.W, PV.Z, PV.Y, 607; EG-NEXT: AND_INT * T3.W, T4.W, literal.y, 608; EG-NEXT: 150(2.101948e-43), 32(4.484155e-44) 609; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, 610; EG-NEXT: AND_INT T2.Y, PV.Z, literal.x, 611; EG-NEXT: SUB_INT T3.Z, literal.y, T0.Y, 612; EG-NEXT: NOT_INT T4.W, T4.W, 613; EG-NEXT: LSHR * T6.W, T1.Z, 1, 614; EG-NEXT: 32(4.484155e-44), 150(2.101948e-43) 615; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, T2.W, T2.Z, 616; EG-NEXT: ADD_INT T0.Y, T0.Y, literal.x, 617; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W, 618; EG-NEXT: BIT_ALIGN_INT T2.W, 0.0, T1.Z, PV.Z, 619; EG-NEXT: AND_INT * T4.W, PV.Z, literal.y, 620; EG-NEXT: -127(nan), 32(4.484155e-44) 621; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0, 622; EG-NEXT: CNDE_INT T4.Y, T3.W, PV.Z, T1.W, 623; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, 624; EG-NEXT: CNDE_INT T1.W, T1.Y, T0.Z, 0.0, 625; EG-NEXT: CNDE_INT * T2.W, T2.Y, PV.X, 0.0, 626; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 627; EG-NEXT: CNDE_INT T2.X, T5.W, PS, PV.W, 628; EG-NEXT: ASHR T1.Y, KC0[3].Z, literal.x, 629; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y, 630; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T1.X, 631; EG-NEXT: ASHR * T2.W, KC0[4].X, literal.x, 632; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 633; EG-NEXT: XOR_INT T2.Y, PV.W, PS, 634; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, 635; EG-NEXT: XOR_INT T1.W, PV.X, PV.Y, 636; EG-NEXT: XOR_INT * T3.W, T3.Y, PV.Y, 637; EG-NEXT: SUB_INT T3.Y, PS, T1.Y, 638; EG-NEXT: SUBB_UINT T1.Z, PV.W, T1.Y, 639; EG-NEXT: SUB_INT T3.W, PV.Z, T2.W, 640; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T2.W, 641; EG-NEXT: SUB_INT T4.Y, PV.W, PS, 642; EG-NEXT: SUB_INT T0.Z, PV.Y, PV.Z, 643; EG-NEXT: BFE_UINT T3.W, KC0[3].Y, literal.x, T0.W, 644; EG-NEXT: AND_INT * T4.W, KC0[3].Y, literal.y, 645; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 646; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X, 647; EG-NEXT: ADD_INT T3.Y, PV.W, literal.x, 648; EG-NEXT: OR_INT T1.Z, PS, literal.y, 649; EG-NEXT: BFE_UINT T0.W, KC0[3].W, literal.z, T0.W, 650; EG-NEXT: ADD_INT * T4.W, PV.W, literal.w, 651; EG-NEXT: -127(nan), 8388608(1.175494e-38) 652; EG-NEXT: 23(3.222986e-44), -150(nan) 653; EG-NEXT: AND_INT T1.X, KC0[3].W, literal.x, 654; EG-NEXT: ADD_INT T5.Y, PV.W, literal.y, 655; EG-NEXT: SUB_INT T2.Z, literal.z, T3.W, 656; EG-NEXT: NOT_INT T3.W, PS, 657; EG-NEXT: LSHR * T5.W, PV.Z, 1, 658; EG-NEXT: 8388607(1.175494e-38), -150(nan) 659; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 660; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, PS, PV.W, 661; EG-NEXT: AND_INT T6.Y, PV.Z, literal.x, 662; EG-NEXT: AND_INT T3.Z, PV.Y, literal.y, 663; EG-NEXT: OR_INT T3.W, PV.X, literal.z, 664; EG-NEXT: AND_INT * T5.W, T4.W, literal.y, 665; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) 666; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 667; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, T1.Z, T2.Z, 668; EG-NEXT: LSHL T7.Y, T1.Z, PS, 669; EG-NEXT: AND_INT T1.Z, T4.W, literal.x, 670; EG-NEXT: LSHL T4.W, PV.W, PV.Z, 671; EG-NEXT: AND_INT * T5.W, T5.Y, literal.x, 672; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 673; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0, 674; EG-NEXT: CNDE_INT T8.Y, PV.Z, PV.Y, 0.0, 675; EG-NEXT: CNDE_INT * T2.Z, T6.Y, PV.X, 0.0, 676; EG-NEXT: ALU clause starting at 106: 677; EG-NEXT: CNDE_INT T6.W, T1.Z, T2.X, T7.Y, BS:VEC_021/SCL_122 678; EG-NEXT: SETGT_INT * T7.W, T3.Y, literal.x, 679; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 680; EG-NEXT: CNDE_INT T1.X, PS, 0.0, PV.W, 681; EG-NEXT: CNDE_INT T6.Y, PS, T2.Z, T8.Y, 682; EG-NEXT: SUB_INT T1.Z, literal.x, T0.W, 683; EG-NEXT: NOT_INT T6.W, T5.Y, 684; EG-NEXT: LSHR * T7.W, T3.W, 1, 685; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 686; EG-NEXT: ASHR T2.X, KC0[3].Y, literal.x, 687; EG-NEXT: ADD_INT T5.Y, T0.W, literal.y, 688; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W, 689; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T3.W, PV.Z, 690; EG-NEXT: AND_INT * T3.W, PV.Z, literal.z, 691; EG-NEXT: 31(4.344025e-44), -127(nan) 692; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 693; EG-NEXT: CNDE_INT T4.X, PS, PV.W, 0.0, 694; EG-NEXT: CNDE_INT T7.Y, T5.W, PV.Z, T4.W, 695; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, 696; EG-NEXT: XOR_INT T0.W, T6.Y, PV.X, 697; EG-NEXT: XOR_INT * T3.W, T1.X, PV.X, 698; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 699; EG-NEXT: SUB_INT T1.X, PS, T2.X, 700; EG-NEXT: SUBB_UINT T6.Y, PV.W, T2.X, 701; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y, 702; EG-NEXT: CNDE_INT T3.W, PV.Z, PV.X, T3.X, 703; EG-NEXT: ASHR * T4.W, KC0[3].W, literal.x, 704; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 705; EG-NEXT: XOR_INT T3.X, PV.W, PS, 706; EG-NEXT: XOR_INT T7.Y, PV.Z, PS, 707; EG-NEXT: SUB_INT T1.Z, PV.X, PV.Y, 708; EG-NEXT: SETGT_INT T3.W, 0.0, T3.Y, 709; EG-NEXT: CNDE_INT * T6.W, T0.X, T0.Z, 0.0, 710; EG-NEXT: SETGT_INT T1.X, 0.0, T0.Y, 711; EG-NEXT: CNDE_INT T6.Y, PV.W, PV.Z, 0.0, 712; EG-NEXT: SUB_INT T0.Z, T1.W, T1.Y, BS:VEC_021/SCL_122 713; EG-NEXT: SUB_INT T1.W, PV.Y, T4.W, 714; EG-NEXT: SUBB_UINT * T5.W, PV.X, T4.W, 715; EG-NEXT: SUB_INT T4.X, PV.W, PS, 716; EG-NEXT: SETGT_INT T0.Y, 0.0, T5.Y, BS:VEC_021/SCL_122 717; EG-NEXT: CNDE_INT T6.Z, T0.X, PV.Z, 0.0, 718; EG-NEXT: SUB_INT T0.W, T0.W, T2.X, 719; EG-NEXT: CNDE_INT * T1.W, PV.X, T4.Y, 0.0, 720; EG-NEXT: CNDE_INT T6.X, T3.W, PV.W, 0.0, 721; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.X, 0.0, 722; EG-NEXT: SUB_INT T0.W, T2.Y, T2.W, 723; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 724; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 725; EG-NEXT: CNDE_INT T1.Z, T1.X, PV.W, 0.0, 726; EG-NEXT: SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212 727; EG-NEXT: CNDE_INT T1.X, T0.Y, PV.W, 0.0, 728; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 729; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 730; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 731; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 732 %conv = fptosi <4 x float> %x to <4 x i64> 733 store <4 x i64> %conv, ptr addrspace(1) %out 734 ret void 735} 736 737define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { 738; SI-LABEL: fp_to_uint_f32_to_i1: 739; SI: ; %bb.0: 740; SI-NEXT: s_load_dword s6, s[4:5], 0xb 741; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 742; SI-NEXT: s_mov_b32 s3, 0xf000 743; SI-NEXT: s_mov_b32 s2, -1 744; SI-NEXT: s_waitcnt lgkmcnt(0) 745; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, s6 746; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 747; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 748; SI-NEXT: s_endpgm 749; 750; VI-LABEL: fp_to_uint_f32_to_i1: 751; VI: ; %bb.0: 752; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 753; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 754; VI-NEXT: s_mov_b32 s3, 0xf000 755; VI-NEXT: s_mov_b32 s2, -1 756; VI-NEXT: s_waitcnt lgkmcnt(0) 757; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, s6 758; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 759; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 760; VI-NEXT: s_endpgm 761; 762; EG-LABEL: fp_to_uint_f32_to_i1: 763; EG: ; %bb.0: 764; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 765; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 766; EG-NEXT: CF_END 767; EG-NEXT: PAD 768; EG-NEXT: ALU clause starting at 4: 769; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 770; EG-NEXT: SETE_DX10 * T1.W, KC0[2].Z, literal.y, 771; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00) 772; EG-NEXT: AND_INT T1.W, PS, 1, 773; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 774; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 775; EG-NEXT: LSHL T0.X, PV.W, PS, 776; EG-NEXT: LSHL * T0.W, literal.x, PS, 777; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 778; EG-NEXT: MOV T0.Y, 0.0, 779; EG-NEXT: MOV * T0.Z, 0.0, 780; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 781; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 782 %conv = fptosi float %in to i1 783 store i1 %conv, ptr addrspace(1) %out 784 ret void 785} 786 787define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { 788; SI-LABEL: fp_to_uint_fabs_f32_to_i1: 789; SI: ; %bb.0: 790; SI-NEXT: s_load_dword s6, s[4:5], 0xb 791; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 792; SI-NEXT: s_mov_b32 s3, 0xf000 793; SI-NEXT: s_mov_b32 s2, -1 794; SI-NEXT: s_waitcnt lgkmcnt(0) 795; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, |s6| 796; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 797; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 798; SI-NEXT: s_endpgm 799; 800; VI-LABEL: fp_to_uint_fabs_f32_to_i1: 801; VI: ; %bb.0: 802; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 803; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 804; VI-NEXT: s_mov_b32 s3, 0xf000 805; VI-NEXT: s_mov_b32 s2, -1 806; VI-NEXT: s_waitcnt lgkmcnt(0) 807; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, |s6| 808; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 809; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 810; VI-NEXT: s_endpgm 811; 812; EG-LABEL: fp_to_uint_fabs_f32_to_i1: 813; EG: ; %bb.0: 814; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 815; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 816; EG-NEXT: CF_END 817; EG-NEXT: PAD 818; EG-NEXT: ALU clause starting at 4: 819; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 820; EG-NEXT: SETE_DX10 * T1.W, |KC0[2].Z|, literal.y, 821; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00) 822; EG-NEXT: AND_INT T1.W, PS, 1, 823; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 824; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 825; EG-NEXT: LSHL T0.X, PV.W, PS, 826; EG-NEXT: LSHL * T0.W, literal.x, PS, 827; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 828; EG-NEXT: MOV T0.Y, 0.0, 829; EG-NEXT: MOV * T0.Z, 0.0, 830; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 831; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 832 %in.fabs = call float @llvm.fabs.f32(float %in) 833 %conv = fptosi float %in.fabs to i1 834 store i1 %conv, ptr addrspace(1) %out 835 ret void 836} 837 838define amdgpu_kernel void @fp_to_sint_f32_i16(ptr addrspace(1) %out, float %in) #0 { 839; SI-LABEL: fp_to_sint_f32_i16: 840; SI: ; %bb.0: 841; SI-NEXT: s_load_dword s6, s[4:5], 0xb 842; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 843; SI-NEXT: s_mov_b32 s3, 0xf000 844; SI-NEXT: s_mov_b32 s2, -1 845; SI-NEXT: s_waitcnt lgkmcnt(0) 846; SI-NEXT: v_cvt_i32_f32_e32 v0, s6 847; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 848; SI-NEXT: s_endpgm 849; 850; VI-LABEL: fp_to_sint_f32_i16: 851; VI: ; %bb.0: 852; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 853; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 854; VI-NEXT: s_mov_b32 s3, 0xf000 855; VI-NEXT: s_waitcnt lgkmcnt(0) 856; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 857; VI-NEXT: s_mov_b32 s2, -1 858; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 859; VI-NEXT: s_endpgm 860; 861; EG-LABEL: fp_to_sint_f32_i16: 862; EG: ; %bb.0: 863; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] 864; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 865; EG-NEXT: CF_END 866; EG-NEXT: PAD 867; EG-NEXT: ALU clause starting at 4: 868; EG-NEXT: TRUNC T0.W, KC0[2].Z, 869; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, 870; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 871; EG-NEXT: FLT_TO_INT * T0.W, PV.W, 872; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 873; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 874; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 875; EG-NEXT: LSHL T0.X, PV.W, PS, 876; EG-NEXT: LSHL * T0.W, literal.x, PS, 877; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 878; EG-NEXT: MOV T0.Y, 0.0, 879; EG-NEXT: MOV * T0.Z, 0.0, 880; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 881; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 882 %sint = fptosi float %in to i16 883 store i16 %sint, ptr addrspace(1) %out 884 ret void 885} 886 887attributes #0 = { nounwind } 888attributes #1 = { nounwind readnone } 889