1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s 5 6define double @v_floor_f64_ieee(double %x) { 7; GFX6-LABEL: v_floor_f64_ieee: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 11; GFX6-NEXT: v_mov_b32_e32 v4, -1 12; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 13; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 14; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 15; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 16; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 17; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 18; GFX6-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX78-LABEL: v_floor_f64_ieee: 21; GFX78: ; %bb.0: 22; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 24; GFX78-NEXT: s_setpc_b64 s[30:31] 25 %result = call double @llvm.floor.f64(double %x) 26 ret double %result 27} 28 29define double @v_floor_f64_ieee_nnan(double %x) { 30; GFX6-LABEL: v_floor_f64_ieee_nnan: 31; GFX6: ; %bb.0: 32; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 34; GFX6-NEXT: v_mov_b32_e32 v4, -1 35; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 36; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 37; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 38; GFX6-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX78-LABEL: v_floor_f64_ieee_nnan: 41; GFX78: ; %bb.0: 42; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 44; GFX78-NEXT: s_setpc_b64 s[30:31] 45 %result = call nnan double @llvm.floor.f64(double %x) 46 ret double %result 47} 48 49define double @v_floor_f64_ieee_fneg(double %x) { 50; GFX6-LABEL: v_floor_f64_ieee_fneg: 51; GFX6: ; %bb.0: 52; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] 54; GFX6-NEXT: v_mov_b32_e32 v4, -1 55; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 56; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 57; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 58; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 59; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 60; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] 61; GFX6-NEXT: s_setpc_b64 s[30:31] 62; 63; GFX78-LABEL: v_floor_f64_ieee_fneg: 64; GFX78: ; %bb.0: 65; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1] 67; GFX78-NEXT: s_setpc_b64 s[30:31] 68 %neg.x = fneg double %x 69 %result = call double @llvm.floor.f64(double %neg.x) 70 ret double %result 71} 72 73define double @v_floor_f64_nonieee(double %x) #1 { 74; GFX6-LABEL: v_floor_f64_nonieee: 75; GFX6: ; %bb.0: 76; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 78; GFX6-NEXT: v_mov_b32_e32 v4, -1 79; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 80; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 81; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 82; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 83; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 84; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 85; GFX6-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX78-LABEL: v_floor_f64_nonieee: 88; GFX78: ; %bb.0: 89; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 91; GFX78-NEXT: s_setpc_b64 s[30:31] 92 %result = call double @llvm.floor.f64(double %x) 93 ret double %result 94} 95 96define double @v_floor_f64_nonieee_nnan(double %x) #1 { 97; GFX6-LABEL: v_floor_f64_nonieee_nnan: 98; GFX6: ; %bb.0: 99; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 101; GFX6-NEXT: v_mov_b32_e32 v4, -1 102; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 103; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 104; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 105; GFX6-NEXT: s_setpc_b64 s[30:31] 106; 107; GFX78-LABEL: v_floor_f64_nonieee_nnan: 108; GFX78: ; %bb.0: 109; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 111; GFX78-NEXT: s_setpc_b64 s[30:31] 112 %result = call nnan double @llvm.floor.f64(double %x) 113 ret double %result 114} 115 116define double @v_floor_f64_non_ieee_fneg(double %x) #1 { 117; GFX6-LABEL: v_floor_f64_non_ieee_fneg: 118; GFX6: ; %bb.0: 119; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] 121; GFX6-NEXT: v_mov_b32_e32 v4, -1 122; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 123; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 124; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 125; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 126; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 127; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] 128; GFX6-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX78-LABEL: v_floor_f64_non_ieee_fneg: 131; GFX78: ; %bb.0: 132; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1] 134; GFX78-NEXT: s_setpc_b64 s[30:31] 135 %neg.x = fneg double %x 136 %result = call double @llvm.floor.f64(double %neg.x) 137 ret double %result 138} 139 140define double @v_floor_f64_fabs(double %x) { 141; GFX6-LABEL: v_floor_f64_fabs: 142; GFX6: ; %bb.0: 143; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]| 145; GFX6-NEXT: v_mov_b32_e32 v4, -1 146; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 147; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 148; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 149; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 150; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 151; GFX6-NEXT: v_add_f64 v[0:1], |v[0:1]|, -v[2:3] 152; GFX6-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX78-LABEL: v_floor_f64_fabs: 155; GFX78: ; %bb.0: 156; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX78-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]| 158; GFX78-NEXT: s_setpc_b64 s[30:31] 159 %abs.x = call double @llvm.fabs.f64(double %x) 160 %result = call double @llvm.floor.f64(double %abs.x) 161 ret double %result 162} 163 164define double @v_floor_f64_fneg_fabs(double %x) { 165; GFX6-LABEL: v_floor_f64_fneg_fabs: 166; GFX6: ; %bb.0: 167; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]| 169; GFX6-NEXT: v_mov_b32_e32 v4, -1 170; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 171; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 172; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 173; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 174; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 175; GFX6-NEXT: v_add_f64 v[0:1], -|v[0:1]|, -v[2:3] 176; GFX6-NEXT: s_setpc_b64 s[30:31] 177; 178; GFX78-LABEL: v_floor_f64_fneg_fabs: 179; GFX78: ; %bb.0: 180; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|v[0:1]| 182; GFX78-NEXT: s_setpc_b64 s[30:31] 183 %abs.x = call double @llvm.fabs.f64(double %x) 184 %neg.abs.x = fneg double %abs.x 185 %result = call double @llvm.floor.f64(double %neg.abs.x) 186 ret double %result 187} 188 189define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) { 190; GFX6-LABEL: s_floor_f64: 191; GFX6: ; %bb.0: 192; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3] 193; GFX6-NEXT: v_mov_b32_e32 v2, -1 194; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff 195; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 196; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 197; GFX6-NEXT: v_mov_b32_e32 v2, s2 198; GFX6-NEXT: v_mov_b32_e32 v3, s3 199; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 200; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 201; GFX6-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1] 202; GFX6-NEXT: ; return to shader part epilog 203; 204; GFX78-LABEL: s_floor_f64: 205; GFX78: ; %bb.0: 206; GFX78-NEXT: v_floor_f64_e32 v[0:1], s[2:3] 207; GFX78-NEXT: ; return to shader part epilog 208 %result = call double @llvm.floor.f64(double %x) 209 %cast = bitcast double %result to <2 x float> 210 ret <2 x float> %cast 211} 212 213define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) { 214; GFX6-LABEL: s_floor_f64_fneg: 215; GFX6: ; %bb.0: 216; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3] 217; GFX6-NEXT: v_mov_b32_e32 v2, -1 218; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff 219; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 220; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 221; GFX6-NEXT: v_mov_b32_e32 v2, s2 222; GFX6-NEXT: v_mov_b32_e32 v3, s3 223; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 224; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 225; GFX6-NEXT: v_add_f64 v[0:1], -s[2:3], -v[0:1] 226; GFX6-NEXT: ; return to shader part epilog 227; 228; GFX78-LABEL: s_floor_f64_fneg: 229; GFX78: ; %bb.0: 230; GFX78-NEXT: v_floor_f64_e64 v[0:1], -s[2:3] 231; GFX78-NEXT: ; return to shader part epilog 232 %neg.x = fneg double %x 233 %result = call double @llvm.floor.f64(double %neg.x) 234 %cast = bitcast double %result to <2 x float> 235 ret <2 x float> %cast 236} 237 238define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) { 239; GFX6-LABEL: s_floor_f64_fabs: 240; GFX6: ; %bb.0: 241; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]| 242; GFX6-NEXT: v_mov_b32_e32 v2, -1 243; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff 244; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 245; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 246; GFX6-NEXT: v_mov_b32_e32 v2, s2 247; GFX6-NEXT: v_mov_b32_e32 v3, s3 248; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 249; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 250; GFX6-NEXT: v_add_f64 v[0:1], |s[2:3]|, -v[0:1] 251; GFX6-NEXT: ; return to shader part epilog 252; 253; GFX78-LABEL: s_floor_f64_fabs: 254; GFX78: ; %bb.0: 255; GFX78-NEXT: v_floor_f64_e64 v[0:1], |s[2:3]| 256; GFX78-NEXT: ; return to shader part epilog 257 %abs.x = call double @llvm.fabs.f64(double %x) 258 %result = call double @llvm.floor.f64(double %abs.x) 259 %cast = bitcast double %result to <2 x float> 260 ret <2 x float> %cast 261} 262 263define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) { 264; GFX6-LABEL: s_floor_f64_fneg_fabs: 265; GFX6: ; %bb.0: 266; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]| 267; GFX6-NEXT: v_mov_b32_e32 v2, -1 268; GFX6-NEXT: v_mov_b32_e32 v3, 0x3fefffff 269; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] 270; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 271; GFX6-NEXT: v_mov_b32_e32 v2, s2 272; GFX6-NEXT: v_mov_b32_e32 v3, s3 273; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 274; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 275; GFX6-NEXT: v_add_f64 v[0:1], -|s[2:3]|, -v[0:1] 276; GFX6-NEXT: ; return to shader part epilog 277; 278; GFX78-LABEL: s_floor_f64_fneg_fabs: 279; GFX78: ; %bb.0: 280; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|s[2:3]| 281; GFX78-NEXT: ; return to shader part epilog 282 %abs.x = call double @llvm.fabs.f64(double %x) 283 %neg.abs.x = fneg double %abs.x 284 %result = call double @llvm.floor.f64(double %neg.abs.x) 285 %cast = bitcast double %result to <2 x float> 286 ret <2 x float> %cast 287} 288 289declare double @llvm.floor.f64(double) #0 290declare double @llvm.fabs.f64(double) #0 291 292attributes #0 = { nounwind readnone speculatable willreturn } 293attributes #1 = { "amdgpu-ieee"="false" } 294