1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s -check-prefixes=SI 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=VI 4; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck %s -check-prefixes=EG 5 6declare float @llvm.fabs.f32(float) #1 7 8define amdgpu_kernel void @fp_to_uint_f32_to_i32 (ptr addrspace(1) %out, float %in) { 9; SI-LABEL: fp_to_uint_f32_to_i32: 10; SI: ; %bb.0: 11; SI-NEXT: s_load_dword s6, s[4:5], 0xb 12; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 13; SI-NEXT: s_mov_b32 s3, 0xf000 14; SI-NEXT: s_mov_b32 s2, -1 15; SI-NEXT: s_waitcnt lgkmcnt(0) 16; SI-NEXT: v_cvt_u32_f32_e32 v0, s6 17; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 18; SI-NEXT: s_endpgm 19; 20; VI-LABEL: fp_to_uint_f32_to_i32: 21; VI: ; %bb.0: 22; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 23; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 24; VI-NEXT: s_mov_b32 s3, 0xf000 25; VI-NEXT: s_waitcnt lgkmcnt(0) 26; VI-NEXT: v_cvt_u32_f32_e32 v0, s2 27; VI-NEXT: s_mov_b32 s2, -1 28; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 29; VI-NEXT: s_endpgm 30; 31; EG-LABEL: fp_to_uint_f32_to_i32: 32; EG: ; %bb.0: 33; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 34; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 35; EG-NEXT: CF_END 36; EG-NEXT: PAD 37; EG-NEXT: ALU clause starting at 4: 38; EG-NEXT: TRUNC * T0.W, KC0[2].Z, 39; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 40; EG-NEXT: FLT_TO_UINT * T1.X, PV.W, 41; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 42 %conv = fptoui float %in to i32 43 store i32 %conv, ptr addrspace(1) %out 44 ret void 45} 46 47define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i32(ptr addrspace(1) %out, <2 x float> %in) { 48; SI-LABEL: fp_to_uint_v2f32_to_v2i32: 49; SI: ; %bb.0: 50; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 51; SI-NEXT: s_mov_b32 s7, 0xf000 52; SI-NEXT: s_mov_b32 s6, -1 53; SI-NEXT: s_waitcnt lgkmcnt(0) 54; SI-NEXT: s_mov_b32 s4, s0 55; SI-NEXT: s_mov_b32 s5, s1 56; SI-NEXT: v_cvt_u32_f32_e32 v1, s3 57; SI-NEXT: v_cvt_u32_f32_e32 v0, s2 58; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 59; SI-NEXT: s_endpgm 60; 61; VI-LABEL: fp_to_uint_v2f32_to_v2i32: 62; VI: ; %bb.0: 63; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 64; VI-NEXT: s_mov_b32 s7, 0xf000 65; VI-NEXT: s_mov_b32 s6, -1 66; VI-NEXT: s_waitcnt lgkmcnt(0) 67; VI-NEXT: v_cvt_u32_f32_e32 v1, s3 68; VI-NEXT: v_cvt_u32_f32_e32 v0, s2 69; VI-NEXT: s_mov_b32 s4, s0 70; VI-NEXT: s_mov_b32 s5, s1 71; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 72; VI-NEXT: s_endpgm 73; 74; EG-LABEL: fp_to_uint_v2f32_to_v2i32: 75; EG: ; %bb.0: 76; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 77; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 78; EG-NEXT: CF_END 79; EG-NEXT: PAD 80; EG-NEXT: ALU clause starting at 4: 81; EG-NEXT: TRUNC T0.W, KC0[3].X, 82; EG-NEXT: TRUNC * T1.W, KC0[2].W, 83; EG-NEXT: FLT_TO_UINT * T0.Y, PV.W, 84; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 85; EG-NEXT: FLT_TO_UINT * T0.X, T1.W, 86; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 87 %result = fptoui <2 x float> %in to <2 x i32> 88 store <2 x i32> %result, ptr addrspace(1) %out 89 ret void 90} 91 92define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 93; SI-LABEL: fp_to_uint_v4f32_to_v4i32: 94; SI: ; %bb.0: 95; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 96; SI-NEXT: s_waitcnt lgkmcnt(0) 97; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 98; SI-NEXT: s_mov_b32 s3, 0xf000 99; SI-NEXT: s_mov_b32 s2, -1 100; SI-NEXT: s_waitcnt lgkmcnt(0) 101; SI-NEXT: v_cvt_u32_f32_e32 v3, s7 102; SI-NEXT: v_cvt_u32_f32_e32 v2, s6 103; SI-NEXT: v_cvt_u32_f32_e32 v1, s5 104; SI-NEXT: v_cvt_u32_f32_e32 v0, s4 105; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 106; SI-NEXT: s_endpgm 107; 108; VI-LABEL: fp_to_uint_v4f32_to_v4i32: 109; VI: ; %bb.0: 110; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 111; VI-NEXT: s_waitcnt lgkmcnt(0) 112; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 113; VI-NEXT: s_mov_b32 s3, 0xf000 114; VI-NEXT: s_mov_b32 s2, -1 115; VI-NEXT: s_waitcnt lgkmcnt(0) 116; VI-NEXT: v_cvt_u32_f32_e32 v3, s7 117; VI-NEXT: v_cvt_u32_f32_e32 v2, s6 118; VI-NEXT: v_cvt_u32_f32_e32 v1, s5 119; VI-NEXT: v_cvt_u32_f32_e32 v0, s4 120; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 121; VI-NEXT: s_endpgm 122; 123; EG-LABEL: fp_to_uint_v4f32_to_v4i32: 124; EG: ; %bb.0: 125; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 126; EG-NEXT: TEX 0 @6 127; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[] 128; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 129; EG-NEXT: CF_END 130; EG-NEXT: PAD 131; EG-NEXT: Fetch clause starting at 6: 132; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 133; EG-NEXT: ALU clause starting at 8: 134; EG-NEXT: MOV * T0.X, KC0[2].Z, 135; EG-NEXT: ALU clause starting at 9: 136; EG-NEXT: TRUNC T0.W, T0.W, 137; EG-NEXT: TRUNC * T1.W, T0.Z, 138; EG-NEXT: FLT_TO_UINT * T0.W, PV.W, 139; EG-NEXT: TRUNC T2.W, T0.Y, 140; EG-NEXT: FLT_TO_UINT * T0.Z, T1.W, 141; EG-NEXT: TRUNC T1.W, T0.X, 142; EG-NEXT: FLT_TO_UINT * T0.Y, PV.W, 143; EG-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 144; EG-NEXT: FLT_TO_UINT * T0.X, PV.W, 145; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 146 %value = load <4 x float>, ptr addrspace(1) %in 147 %result = fptoui <4 x float> %value to <4 x i32> 148 store <4 x i32> %result, ptr addrspace(1) %out 149 ret void 150} 151 152define amdgpu_kernel void @fp_to_uint_f32_to_i64(ptr addrspace(1) %out, float %x) { 153; SI-LABEL: fp_to_uint_f32_to_i64: 154; SI: ; %bb.0: 155; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 156; SI-NEXT: s_load_dword s4, s[4:5], 0xb 157; SI-NEXT: s_mov_b32 s3, 0xf000 158; SI-NEXT: s_mov_b32 s2, -1 159; SI-NEXT: s_mov_b32 s5, 0xcf800000 160; SI-NEXT: s_waitcnt lgkmcnt(0) 161; SI-NEXT: v_trunc_f32_e32 v0, s4 162; SI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 163; SI-NEXT: v_floor_f32_e32 v2, v1 164; SI-NEXT: v_cvt_u32_f32_e32 v1, v2 165; SI-NEXT: v_fma_f32 v0, v2, s5, v0 166; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 167; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 168; SI-NEXT: s_endpgm 169; 170; VI-LABEL: fp_to_uint_f32_to_i64: 171; VI: ; %bb.0: 172; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 173; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 174; VI-NEXT: s_mov_b32 s3, 0xcf800000 175; VI-NEXT: s_waitcnt lgkmcnt(0) 176; VI-NEXT: v_trunc_f32_e32 v0, s2 177; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 178; VI-NEXT: v_floor_f32_e32 v2, v1 179; VI-NEXT: v_fma_f32 v0, v2, s3, v0 180; VI-NEXT: v_cvt_u32_f32_e32 v1, v2 181; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 182; VI-NEXT: s_mov_b32 s3, 0xf000 183; VI-NEXT: s_mov_b32 s2, -1 184; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 185; VI-NEXT: s_endpgm 186; 187; EG-LABEL: fp_to_uint_f32_to_i64: 188; EG: ; %bb.0: 189; EG-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] 190; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 191; EG-NEXT: CF_END 192; EG-NEXT: PAD 193; EG-NEXT: ALU clause starting at 4: 194; EG-NEXT: MOV * T0.W, literal.x, 195; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 196; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W, 197; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y, 198; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 199; EG-NEXT: OR_INT T1.W, PS, literal.x, 200; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y, 201; EG-NEXT: 8388608(1.175494e-38), -150(nan) 202; EG-NEXT: ADD_INT T0.X, T0.W, literal.x, 203; EG-NEXT: AND_INT T0.Y, PS, literal.y, 204; EG-NEXT: SUB_INT T0.Z, literal.z, T0.W, 205; EG-NEXT: NOT_INT T0.W, PS, 206; EG-NEXT: LSHR * T3.W, PV.W, 1, 207; EG-NEXT: -127(nan), 31(4.344025e-44) 208; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 209; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, 210; EG-NEXT: AND_INT T1.Y, PV.Z, literal.x, 211; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.W, PV.Z, 212; EG-NEXT: LSHL T0.W, T1.W, PV.Y, 213; EG-NEXT: AND_INT * T1.W, T2.W, literal.x, 214; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 215; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 216; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, 217; EG-NEXT: CNDE_INT T0.W, PS, PV.X, PV.W, 218; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x, 219; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 220; EG-NEXT: CNDE_INT T1.Z, PS, 0.0, PV.W, 221; EG-NEXT: CNDE_INT T0.W, PS, PV.Z, PV.Y, 222; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x, 223; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 224; EG-NEXT: XOR_INT T0.W, PV.W, PS, 225; EG-NEXT: XOR_INT * T2.W, PV.Z, PS, 226; EG-NEXT: SUB_INT T2.W, PS, T1.W, 227; EG-NEXT: SUBB_UINT * T3.W, PV.W, T1.W, 228; EG-NEXT: SUB_INT T2.W, PV.W, PS, 229; EG-NEXT: SETGT_INT * T3.W, 0.0, T0.X, 230; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 231; EG-NEXT: SUB_INT * T0.W, T0.W, T1.W, 232; EG-NEXT: CNDE_INT T0.X, T3.W, PV.W, 0.0, 233; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 234; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 235 %conv = fptoui float %x to i64 236 store i64 %conv, ptr addrspace(1) %out 237 ret void 238} 239 240define amdgpu_kernel void @fp_to_uint_v2f32_to_v2i64(ptr addrspace(1) %out, <2 x float> %x) { 241; SI-LABEL: fp_to_uint_v2f32_to_v2i64: 242; SI: ; %bb.0: 243; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 244; SI-NEXT: s_mov_b32 s7, 0xf000 245; SI-NEXT: s_mov_b32 s6, -1 246; SI-NEXT: s_mov_b32 s8, 0xcf800000 247; SI-NEXT: s_waitcnt lgkmcnt(0) 248; SI-NEXT: s_mov_b32 s4, s0 249; SI-NEXT: s_mov_b32 s5, s1 250; SI-NEXT: v_trunc_f32_e32 v0, s3 251; SI-NEXT: v_trunc_f32_e32 v2, s2 252; SI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 253; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 254; SI-NEXT: v_floor_f32_e32 v4, v1 255; SI-NEXT: v_floor_f32_e32 v5, v3 256; SI-NEXT: v_cvt_u32_f32_e32 v3, v4 257; SI-NEXT: v_cvt_u32_f32_e32 v1, v5 258; SI-NEXT: v_fma_f32 v0, v4, s8, v0 259; SI-NEXT: v_fma_f32 v4, v5, s8, v2 260; SI-NEXT: v_cvt_u32_f32_e32 v2, v0 261; SI-NEXT: v_cvt_u32_f32_e32 v0, v4 262; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 263; SI-NEXT: s_endpgm 264; 265; VI-LABEL: fp_to_uint_v2f32_to_v2i64: 266; VI: ; %bb.0: 267; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 268; VI-NEXT: s_mov_b32 s7, 0xf000 269; VI-NEXT: s_mov_b32 s6, -1 270; VI-NEXT: s_waitcnt lgkmcnt(0) 271; VI-NEXT: v_trunc_f32_e32 v0, s3 272; VI-NEXT: v_trunc_f32_e32 v4, s2 273; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 274; VI-NEXT: v_mul_f32_e32 v2, 0x2f800000, v4 275; VI-NEXT: v_floor_f32_e32 v5, v1 276; VI-NEXT: s_mov_b32 s2, 0xcf800000 277; VI-NEXT: v_floor_f32_e32 v6, v2 278; VI-NEXT: v_fma_f32 v0, v5, s2, v0 279; VI-NEXT: v_cvt_u32_f32_e32 v2, v0 280; VI-NEXT: v_fma_f32 v0, v6, s2, v4 281; VI-NEXT: v_cvt_u32_f32_e32 v3, v5 282; VI-NEXT: v_cvt_u32_f32_e32 v1, v6 283; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 284; VI-NEXT: s_mov_b32 s4, s0 285; VI-NEXT: s_mov_b32 s5, s1 286; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 287; VI-NEXT: s_endpgm 288; 289; EG-LABEL: fp_to_uint_v2f32_to_v2i64: 290; EG: ; %bb.0: 291; EG-NEXT: ALU 74, @4, KC0[CB0:0-32], KC1[] 292; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 293; EG-NEXT: CF_END 294; EG-NEXT: PAD 295; EG-NEXT: ALU clause starting at 4: 296; EG-NEXT: MOV * T0.W, literal.x, 297; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 298; EG-NEXT: BFE_UINT T0.Z, KC0[3].X, literal.x, PV.W, 299; EG-NEXT: BFE_UINT T0.W, KC0[2].W, literal.x, PV.W, 300; EG-NEXT: AND_INT * T1.Z, KC0[2].W, literal.y, 301; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 302; EG-NEXT: ADD_INT T1.W, PV.W, literal.x, 303; EG-NEXT: ADD_INT * T2.W, PV.Z, literal.x, 304; EG-NEXT: -150(nan), 0(0.000000e+00) 305; EG-NEXT: AND_INT T0.X, PS, literal.x, 306; EG-NEXT: AND_INT T0.Y, PV.W, literal.x, 307; EG-NEXT: OR_INT T1.Z, T1.Z, literal.y, 308; EG-NEXT: SUB_INT T3.W, literal.z, T0.W, 309; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w, 310; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38) 311; EG-NEXT: 150(2.101948e-43), 8388607(1.175494e-38) 312; EG-NEXT: OR_INT T1.X, PS, literal.x, 313; EG-NEXT: AND_INT T1.Y, PV.W, literal.y, 314; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.Z, PV.W, 315; EG-NEXT: LSHL T3.W, PV.Z, PV.Y, 316; EG-NEXT: AND_INT * T4.W, T1.W, literal.y, 317; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44) 318; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 319; EG-NEXT: CNDE_INT T2.Z, PV.Y, PV.Z, 0.0, 320; EG-NEXT: LSHL T5.W, PV.X, T0.X, 321; EG-NEXT: AND_INT * T6.W, T2.W, literal.x, 322; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 323; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0, 324; EG-NEXT: NOT_INT T1.Y, T1.W, 325; EG-NEXT: SUB_INT T3.Z, literal.x, T0.Z, 326; EG-NEXT: NOT_INT T1.W, T2.W, BS:VEC_120/SCL_212 327; EG-NEXT: LSHR * T2.W, T1.X, 1, 328; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 329; EG-NEXT: LSHR T2.X, T1.Z, 1, 330; EG-NEXT: ADD_INT T2.Y, T0.Z, literal.x, BS:VEC_120/SCL_212 331; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, PS, PV.W, 332; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.X, PV.Z, 333; EG-NEXT: AND_INT * T2.W, PV.Z, literal.y, 334; EG-NEXT: -127(nan), 32(4.484155e-44) 335; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, 336; EG-NEXT: CNDE_INT T3.Y, T6.W, PV.Z, T5.W, BS:VEC_021/SCL_122 337; EG-NEXT: SETGT_INT T0.Z, PV.Y, literal.x, 338; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, PV.X, T1.Y, 339; EG-NEXT: ADD_INT * T0.W, T0.W, literal.y, 340; EG-NEXT: 23(3.222986e-44), -127(nan) 341; EG-NEXT: CNDE_INT T2.X, T4.W, PV.W, T3.W, 342; EG-NEXT: SETGT_INT T1.Y, PS, literal.x, 343; EG-NEXT: CNDE_INT T1.Z, PV.Z, 0.0, PV.Y, 344; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T0.X, 345; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y, 346; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) 347; EG-NEXT: XOR_INT T0.X, PV.W, PS, 348; EG-NEXT: XOR_INT T3.Y, PV.Z, PS, 349; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X, 350; EG-NEXT: CNDE_INT T1.W, PV.Y, T2.Z, T0.Y, 351; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x, 352; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 353; EG-NEXT: XOR_INT T0.Y, PV.W, PS, 354; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, 355; EG-NEXT: SUB_INT T1.W, PV.Y, T2.W, 356; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W, 357; EG-NEXT: SUB_INT T1.Y, PV.W, PS, 358; EG-NEXT: SETGT_INT T1.Z, 0.0, T2.Y, 359; EG-NEXT: SUB_INT T1.W, PV.Z, T3.W, 360; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W, 361; EG-NEXT: SUB_INT T0.Z, PV.W, PS, 362; EG-NEXT: SETGT_INT T0.W, 0.0, T0.W, 363; EG-NEXT: CNDE_INT * T1.W, PV.Z, PV.Y, 0.0, 364; EG-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, 0.0, 365; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W, 366; EG-NEXT: CNDE_INT T1.Z, T1.Z, PV.W, 0.0, 367; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W, 368; EG-NEXT: CNDE_INT T1.X, T0.W, PV.W, 0.0, 369; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 370; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 371 %conv = fptoui <2 x float> %x to <2 x i64> 372 store <2 x i64> %conv, ptr addrspace(1) %out 373 ret void 374} 375 376define amdgpu_kernel void @fp_to_uint_v4f32_to_v4i64(ptr addrspace(1) %out, <4 x float> %x) { 377; SI-LABEL: fp_to_uint_v4f32_to_v4i64: 378; SI: ; %bb.0: 379; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 380; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0xd 381; SI-NEXT: s_mov_b32 s3, 0xf000 382; SI-NEXT: s_mov_b32 s2, -1 383; SI-NEXT: s_mov_b32 s8, 0xcf800000 384; SI-NEXT: s_waitcnt lgkmcnt(0) 385; SI-NEXT: v_trunc_f32_e32 v0, s5 386; SI-NEXT: v_trunc_f32_e32 v2, s4 387; SI-NEXT: v_trunc_f32_e32 v4, s7 388; SI-NEXT: v_trunc_f32_e32 v6, s6 389; SI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 390; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 391; SI-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 392; SI-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 393; SI-NEXT: v_floor_f32_e32 v8, v1 394; SI-NEXT: v_floor_f32_e32 v9, v3 395; SI-NEXT: v_floor_f32_e32 v10, v5 396; SI-NEXT: v_floor_f32_e32 v11, v7 397; SI-NEXT: v_cvt_u32_f32_e32 v3, v8 398; SI-NEXT: v_cvt_u32_f32_e32 v1, v9 399; SI-NEXT: v_fma_f32 v0, v8, s8, v0 400; SI-NEXT: v_fma_f32 v8, v9, s8, v2 401; SI-NEXT: v_cvt_u32_f32_e32 v7, v10 402; SI-NEXT: v_cvt_u32_f32_e32 v5, v11 403; SI-NEXT: v_fma_f32 v4, v10, s8, v4 404; SI-NEXT: v_fma_f32 v9, v11, s8, v6 405; SI-NEXT: v_cvt_u32_f32_e32 v2, v0 406; SI-NEXT: v_cvt_u32_f32_e32 v0, v8 407; SI-NEXT: v_cvt_u32_f32_e32 v6, v4 408; SI-NEXT: v_cvt_u32_f32_e32 v4, v9 409; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 410; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 411; SI-NEXT: s_endpgm 412; 413; VI-LABEL: fp_to_uint_v4f32_to_v4i64: 414; VI: ; %bb.0: 415; VI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 416; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 417; VI-NEXT: s_mov_b32 s2, 0xcf800000 418; VI-NEXT: s_mov_b32 s3, 0xf000 419; VI-NEXT: s_waitcnt lgkmcnt(0) 420; VI-NEXT: v_trunc_f32_e32 v0, s9 421; VI-NEXT: v_trunc_f32_e32 v4, s8 422; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 423; VI-NEXT: v_mul_f32_e32 v2, 0x2f800000, v4 424; VI-NEXT: v_floor_f32_e32 v5, v1 425; VI-NEXT: v_floor_f32_e32 v6, v2 426; VI-NEXT: v_fma_f32 v0, v5, s2, v0 427; VI-NEXT: v_cvt_u32_f32_e32 v2, v0 428; VI-NEXT: v_fma_f32 v0, v6, s2, v4 429; VI-NEXT: v_trunc_f32_e32 v4, s11 430; VI-NEXT: v_cvt_u32_f32_e32 v3, v5 431; VI-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 432; VI-NEXT: v_trunc_f32_e32 v8, s10 433; VI-NEXT: v_cvt_u32_f32_e32 v1, v6 434; VI-NEXT: v_floor_f32_e32 v6, v5 435; VI-NEXT: v_mul_f32_e32 v5, 0x2f800000, v8 436; VI-NEXT: v_floor_f32_e32 v9, v5 437; VI-NEXT: v_fma_f32 v4, v6, s2, v4 438; VI-NEXT: v_cvt_u32_f32_e32 v7, v6 439; VI-NEXT: v_cvt_u32_f32_e32 v6, v4 440; VI-NEXT: v_fma_f32 v4, v9, s2, v8 441; VI-NEXT: v_cvt_u32_f32_e32 v5, v9 442; VI-NEXT: v_cvt_u32_f32_e32 v4, v4 443; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 444; VI-NEXT: s_mov_b32 s2, -1 445; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 446; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 447; VI-NEXT: s_endpgm 448; 449; EG-LABEL: fp_to_uint_v4f32_to_v4i64: 450; EG: ; %bb.0: 451; EG-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[] 452; EG-NEXT: ALU 54, @106, KC0[CB0:0-32], KC1[] 453; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 454; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T0.X, 1 455; EG-NEXT: CF_END 456; EG-NEXT: PAD 457; EG-NEXT: ALU clause starting at 6: 458; EG-NEXT: MOV * T0.W, literal.x, 459; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 460; EG-NEXT: BFE_UINT T1.W, KC0[3].Z, literal.x, PV.W, 461; EG-NEXT: AND_INT * T2.W, KC0[3].Z, literal.y, 462; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 463; EG-NEXT: OR_INT T2.W, PS, literal.x, 464; EG-NEXT: ADD_INT * T3.W, PV.W, literal.y, 465; EG-NEXT: 8388608(1.175494e-38), -150(nan) 466; EG-NEXT: ADD_INT T0.X, T1.W, literal.x, 467; EG-NEXT: BFE_UINT T0.Y, KC0[4].X, literal.y, T0.W, 468; EG-NEXT: AND_INT T0.Z, PS, literal.z, 469; EG-NEXT: NOT_INT T4.W, PS, 470; EG-NEXT: LSHR * T5.W, PV.W, 1, 471; EG-NEXT: -127(nan), 23(3.222986e-44) 472; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 473; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, 474; EG-NEXT: AND_INT T1.Y, T3.W, literal.x, 475; EG-NEXT: LSHL T0.Z, T2.W, PV.Z, BS:VEC_120/SCL_212 476; EG-NEXT: AND_INT T3.W, KC0[4].X, literal.y, 477; EG-NEXT: ADD_INT * T4.W, PV.Y, literal.z, 478; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38) 479; EG-NEXT: -150(nan), 0(0.000000e+00) 480; EG-NEXT: AND_INT T2.Y, PS, literal.x, 481; EG-NEXT: OR_INT T1.Z, PV.W, literal.y, 482; EG-NEXT: CNDE_INT T3.W, PV.Y, PV.X, PV.Z, 483; EG-NEXT: SETGT_INT * T5.W, T0.X, literal.z, 484; EG-NEXT: 31(4.344025e-44), 8388608(1.175494e-38) 485; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 486; EG-NEXT: CNDE_INT T3.Y, PS, 0.0, PV.W, 487; EG-NEXT: SUB_INT T2.Z, literal.x, T1.W, 488; EG-NEXT: LSHL T1.W, PV.Z, PV.Y, 489; EG-NEXT: AND_INT * T3.W, T4.W, literal.y, 490; EG-NEXT: 150(2.101948e-43), 32(4.484155e-44) 491; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, 492; EG-NEXT: AND_INT T2.Y, PV.Z, literal.x, 493; EG-NEXT: SUB_INT T3.Z, literal.y, T0.Y, 494; EG-NEXT: NOT_INT T4.W, T4.W, 495; EG-NEXT: LSHR * T6.W, T1.Z, 1, 496; EG-NEXT: 32(4.484155e-44), 150(2.101948e-43) 497; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, T2.W, T2.Z, 498; EG-NEXT: ADD_INT T0.Y, T0.Y, literal.x, 499; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W, 500; EG-NEXT: BIT_ALIGN_INT T2.W, 0.0, T1.Z, PV.Z, 501; EG-NEXT: AND_INT * T4.W, PV.Z, literal.y, 502; EG-NEXT: -127(nan), 32(4.484155e-44) 503; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0, 504; EG-NEXT: CNDE_INT T4.Y, T3.W, PV.Z, T1.W, 505; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, 506; EG-NEXT: CNDE_INT T1.W, T1.Y, T0.Z, 0.0, 507; EG-NEXT: CNDE_INT * T2.W, T2.Y, PV.X, 0.0, 508; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 509; EG-NEXT: CNDE_INT T2.X, T5.W, PS, PV.W, 510; EG-NEXT: ASHR T1.Y, KC0[3].Z, literal.x, 511; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y, 512; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.X, T1.X, 513; EG-NEXT: ASHR * T2.W, KC0[4].X, literal.x, 514; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 515; EG-NEXT: XOR_INT T2.Y, PV.W, PS, 516; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, 517; EG-NEXT: XOR_INT T1.W, PV.X, PV.Y, 518; EG-NEXT: XOR_INT * T3.W, T3.Y, PV.Y, 519; EG-NEXT: SUB_INT T3.Y, PS, T1.Y, 520; EG-NEXT: SUBB_UINT T1.Z, PV.W, T1.Y, 521; EG-NEXT: SUB_INT T3.W, PV.Z, T2.W, 522; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T2.W, 523; EG-NEXT: SUB_INT T4.Y, PV.W, PS, 524; EG-NEXT: SUB_INT T0.Z, PV.Y, PV.Z, 525; EG-NEXT: BFE_UINT T3.W, KC0[3].Y, literal.x, T0.W, 526; EG-NEXT: AND_INT * T4.W, KC0[3].Y, literal.y, 527; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 528; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X, 529; EG-NEXT: ADD_INT T3.Y, PV.W, literal.x, 530; EG-NEXT: OR_INT T1.Z, PS, literal.y, 531; EG-NEXT: BFE_UINT T0.W, KC0[3].W, literal.z, T0.W, 532; EG-NEXT: ADD_INT * T4.W, PV.W, literal.w, 533; EG-NEXT: -127(nan), 8388608(1.175494e-38) 534; EG-NEXT: 23(3.222986e-44), -150(nan) 535; EG-NEXT: AND_INT T1.X, KC0[3].W, literal.x, 536; EG-NEXT: ADD_INT T5.Y, PV.W, literal.y, 537; EG-NEXT: SUB_INT T2.Z, literal.z, T3.W, 538; EG-NEXT: NOT_INT T3.W, PS, 539; EG-NEXT: LSHR * T5.W, PV.Z, 1, 540; EG-NEXT: 8388607(1.175494e-38), -150(nan) 541; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 542; EG-NEXT: BIT_ALIGN_INT T2.X, 0.0, PS, PV.W, 543; EG-NEXT: AND_INT T6.Y, PV.Z, literal.x, 544; EG-NEXT: AND_INT T3.Z, PV.Y, literal.y, 545; EG-NEXT: OR_INT T3.W, PV.X, literal.z, 546; EG-NEXT: AND_INT * T5.W, T4.W, literal.y, 547; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) 548; EG-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 549; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, T1.Z, T2.Z, 550; EG-NEXT: LSHL T7.Y, T1.Z, PS, 551; EG-NEXT: AND_INT T1.Z, T4.W, literal.x, 552; EG-NEXT: LSHL T4.W, PV.W, PV.Z, 553; EG-NEXT: AND_INT * T5.W, T5.Y, literal.x, 554; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 555; EG-NEXT: CNDE_INT T3.X, PS, PV.W, 0.0, 556; EG-NEXT: CNDE_INT T8.Y, PV.Z, PV.Y, 0.0, 557; EG-NEXT: CNDE_INT * T2.Z, T6.Y, PV.X, 0.0, 558; EG-NEXT: ALU clause starting at 106: 559; EG-NEXT: CNDE_INT T6.W, T1.Z, T2.X, T7.Y, BS:VEC_021/SCL_122 560; EG-NEXT: SETGT_INT * T7.W, T3.Y, literal.x, 561; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 562; EG-NEXT: CNDE_INT T1.X, PS, 0.0, PV.W, 563; EG-NEXT: CNDE_INT T6.Y, PS, T2.Z, T8.Y, 564; EG-NEXT: SUB_INT T1.Z, literal.x, T0.W, 565; EG-NEXT: NOT_INT T6.W, T5.Y, 566; EG-NEXT: LSHR * T7.W, T3.W, 1, 567; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 568; EG-NEXT: ASHR T2.X, KC0[3].Y, literal.x, 569; EG-NEXT: ADD_INT T5.Y, T0.W, literal.y, 570; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PS, PV.W, 571; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T3.W, PV.Z, 572; EG-NEXT: AND_INT * T3.W, PV.Z, literal.z, 573; EG-NEXT: 31(4.344025e-44), -127(nan) 574; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 575; EG-NEXT: CNDE_INT T4.X, PS, PV.W, 0.0, 576; EG-NEXT: CNDE_INT T7.Y, T5.W, PV.Z, T4.W, 577; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, 578; EG-NEXT: XOR_INT T0.W, T6.Y, PV.X, 579; EG-NEXT: XOR_INT * T3.W, T1.X, PV.X, 580; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 581; EG-NEXT: SUB_INT T1.X, PS, T2.X, 582; EG-NEXT: SUBB_UINT T6.Y, PV.W, T2.X, 583; EG-NEXT: CNDE_INT T2.Z, PV.Z, 0.0, PV.Y, 584; EG-NEXT: CNDE_INT T3.W, PV.Z, PV.X, T3.X, 585; EG-NEXT: ASHR * T4.W, KC0[3].W, literal.x, 586; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 587; EG-NEXT: XOR_INT T3.X, PV.W, PS, 588; EG-NEXT: XOR_INT T7.Y, PV.Z, PS, 589; EG-NEXT: SUB_INT T1.Z, PV.X, PV.Y, 590; EG-NEXT: SETGT_INT T3.W, 0.0, T3.Y, 591; EG-NEXT: CNDE_INT * T6.W, T0.X, T0.Z, 0.0, 592; EG-NEXT: SETGT_INT T1.X, 0.0, T0.Y, 593; EG-NEXT: CNDE_INT T6.Y, PV.W, PV.Z, 0.0, 594; EG-NEXT: SUB_INT T0.Z, T1.W, T1.Y, BS:VEC_021/SCL_122 595; EG-NEXT: SUB_INT T1.W, PV.Y, T4.W, 596; EG-NEXT: SUBB_UINT * T5.W, PV.X, T4.W, 597; EG-NEXT: SUB_INT T4.X, PV.W, PS, 598; EG-NEXT: SETGT_INT T0.Y, 0.0, T5.Y, BS:VEC_021/SCL_122 599; EG-NEXT: CNDE_INT T6.Z, T0.X, PV.Z, 0.0, 600; EG-NEXT: SUB_INT T0.W, T0.W, T2.X, 601; EG-NEXT: CNDE_INT * T1.W, PV.X, T4.Y, 0.0, 602; EG-NEXT: CNDE_INT T6.X, T3.W, PV.W, 0.0, 603; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.X, 0.0, 604; EG-NEXT: SUB_INT T0.W, T2.Y, T2.W, 605; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 606; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 607; EG-NEXT: CNDE_INT T1.Z, T1.X, PV.W, 0.0, 608; EG-NEXT: SUB_INT * T0.W, T3.X, T4.W, BS:VEC_120/SCL_212 609; EG-NEXT: CNDE_INT T1.X, T0.Y, PV.W, 0.0, 610; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 611; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 612; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 613; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 614 %conv = fptoui <4 x float> %x to <4 x i64> 615 store <4 x i64> %conv, ptr addrspace(1) %out 616 ret void 617} 618 619define amdgpu_kernel void @fp_to_uint_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { 620; SI-LABEL: fp_to_uint_f32_to_i1: 621; SI: ; %bb.0: 622; SI-NEXT: s_load_dword s6, s[4:5], 0xb 623; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 624; SI-NEXT: s_mov_b32 s3, 0xf000 625; SI-NEXT: s_mov_b32 s2, -1 626; SI-NEXT: s_waitcnt lgkmcnt(0) 627; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, s6 628; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 629; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 630; SI-NEXT: s_endpgm 631; 632; VI-LABEL: fp_to_uint_f32_to_i1: 633; VI: ; %bb.0: 634; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 635; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 636; VI-NEXT: s_mov_b32 s3, 0xf000 637; VI-NEXT: s_mov_b32 s2, -1 638; VI-NEXT: s_waitcnt lgkmcnt(0) 639; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, s6 640; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 641; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 642; VI-NEXT: s_endpgm 643; 644; EG-LABEL: fp_to_uint_f32_to_i1: 645; EG: ; %bb.0: 646; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 647; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 648; EG-NEXT: CF_END 649; EG-NEXT: PAD 650; EG-NEXT: ALU clause starting at 4: 651; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 652; EG-NEXT: SETE_DX10 * T1.W, KC0[2].Z, 1.0, 653; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 654; EG-NEXT: AND_INT T1.W, PS, 1, 655; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 656; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 657; EG-NEXT: LSHL T0.X, PV.W, PS, 658; EG-NEXT: LSHL * T0.W, literal.x, PS, 659; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 660; EG-NEXT: MOV T0.Y, 0.0, 661; EG-NEXT: MOV * T0.Z, 0.0, 662; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 663; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 664 %conv = fptoui float %in to i1 665 store i1 %conv, ptr addrspace(1) %out 666 ret void 667} 668 669define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(ptr addrspace(1) %out, float %in) #0 { 670; SI-LABEL: fp_to_uint_fabs_f32_to_i1: 671; SI: ; %bb.0: 672; SI-NEXT: s_load_dword s6, s[4:5], 0xb 673; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 674; SI-NEXT: s_mov_b32 s3, 0xf000 675; SI-NEXT: s_mov_b32 s2, -1 676; SI-NEXT: s_waitcnt lgkmcnt(0) 677; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, |s6| 678; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 679; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 680; SI-NEXT: s_endpgm 681; 682; VI-LABEL: fp_to_uint_fabs_f32_to_i1: 683; VI: ; %bb.0: 684; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 685; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 686; VI-NEXT: s_mov_b32 s3, 0xf000 687; VI-NEXT: s_mov_b32 s2, -1 688; VI-NEXT: s_waitcnt lgkmcnt(0) 689; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], 1.0, |s6| 690; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 691; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 692; VI-NEXT: s_endpgm 693; 694; EG-LABEL: fp_to_uint_fabs_f32_to_i1: 695; EG: ; %bb.0: 696; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 697; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 698; EG-NEXT: CF_END 699; EG-NEXT: PAD 700; EG-NEXT: ALU clause starting at 4: 701; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 702; EG-NEXT: SETE_DX10 * T1.W, |KC0[2].Z|, 1.0, 703; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 704; EG-NEXT: AND_INT T1.W, PS, 1, 705; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 706; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 707; EG-NEXT: LSHL T0.X, PV.W, PS, 708; EG-NEXT: LSHL * T0.W, literal.x, PS, 709; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 710; EG-NEXT: MOV T0.Y, 0.0, 711; EG-NEXT: MOV * T0.Z, 0.0, 712; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 713; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 714 %in.fabs = call float @llvm.fabs.f32(float %in) 715 %conv = fptoui float %in.fabs to i1 716 store i1 %conv, ptr addrspace(1) %out 717 ret void 718} 719 720define amdgpu_kernel void @fp_to_uint_f32_to_i16(ptr addrspace(1) %out, float %in) #0 { 721; SI-LABEL: fp_to_uint_f32_to_i16: 722; SI: ; %bb.0: 723; SI-NEXT: s_load_dword s6, s[4:5], 0xb 724; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 725; SI-NEXT: s_mov_b32 s3, 0xf000 726; SI-NEXT: s_mov_b32 s2, -1 727; SI-NEXT: s_waitcnt lgkmcnt(0) 728; SI-NEXT: v_cvt_u32_f32_e32 v0, s6 729; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 730; SI-NEXT: s_endpgm 731; 732; VI-LABEL: fp_to_uint_f32_to_i16: 733; VI: ; %bb.0: 734; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 735; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 736; VI-NEXT: s_mov_b32 s3, 0xf000 737; VI-NEXT: s_waitcnt lgkmcnt(0) 738; VI-NEXT: v_cvt_u32_f32_e32 v0, s2 739; VI-NEXT: s_mov_b32 s2, -1 740; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 741; VI-NEXT: s_endpgm 742; 743; EG-LABEL: fp_to_uint_f32_to_i16: 744; EG: ; %bb.0: 745; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 746; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 747; EG-NEXT: CF_END 748; EG-NEXT: PAD 749; EG-NEXT: ALU clause starting at 4: 750; EG-NEXT: TRUNC T0.W, KC0[2].Z, 751; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, 752; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 753; EG-NEXT: LSHL T1.W, PS, literal.x, 754; EG-NEXT: FLT_TO_UINT * T0.X, PV.W, 755; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 756; EG-NEXT: LSHL T0.X, PS, PV.W, 757; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 758; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 759; EG-NEXT: MOV T0.Y, 0.0, 760; EG-NEXT: MOV * T0.Z, 0.0, 761; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 762; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 763 %uint = fptoui float %in to i16 764 store i16 %uint, ptr addrspace(1) %out 765 ret void 766} 767 768attributes #0 = { nounwind } 769attributes #1 = { nounwind readnone } 770