1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 8; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SDAG_GFX6 %s 9; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=SDAG_GFX7 %s 10; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=SDAG_GFX8 %s 11; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=SDAG_GFX9 %s 12; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX10 %s 13; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=SDAG_GFX10PLUS,SDAG_GFX11 %s 14 15define float @v_roundeven_f32(float %x) { 16; GFX6-LABEL: v_roundeven_f32: 17; GFX6: ; %bb.0: 18; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX6-NEXT: v_rndne_f32_e32 v0, v0 20; GFX6-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX7-LABEL: v_roundeven_f32: 23; GFX7: ; %bb.0: 24; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX7-NEXT: v_rndne_f32_e32 v0, v0 26; GFX7-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX8-LABEL: v_roundeven_f32: 29; GFX8: ; %bb.0: 30; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX8-NEXT: v_rndne_f32_e32 v0, v0 32; GFX8-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX9-LABEL: v_roundeven_f32: 35; GFX9: ; %bb.0: 36; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX9-NEXT: v_rndne_f32_e32 v0, v0 38; GFX9-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX10PLUS-LABEL: v_roundeven_f32: 41; GFX10PLUS: ; %bb.0: 42; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 44; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 45; 46; SDAG_GFX6-LABEL: v_roundeven_f32: 47; SDAG_GFX6: ; %bb.0: 48; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 50; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 51; 52; SDAG_GFX7-LABEL: v_roundeven_f32: 53; SDAG_GFX7: ; %bb.0: 54; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 56; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 57; 58; SDAG_GFX8-LABEL: v_roundeven_f32: 59; SDAG_GFX8: ; %bb.0: 60; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 62; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 63; 64; SDAG_GFX9-LABEL: v_roundeven_f32: 65; SDAG_GFX9: ; %bb.0: 66; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 68; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 69; 70; SDAG_GFX10PLUS-LABEL: v_roundeven_f32: 71; SDAG_GFX10PLUS: ; %bb.0: 72; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 74; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 75 %roundeven = call float @llvm.roundeven.f32(float %x) 76 ret float %roundeven 77} 78 79define <2 x float> @v_roundeven_v2f32(<2 x float> %x) { 80; GFX6-LABEL: v_roundeven_v2f32: 81; GFX6: ; %bb.0: 82; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX6-NEXT: v_rndne_f32_e32 v0, v0 84; GFX6-NEXT: v_rndne_f32_e32 v1, v1 85; GFX6-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX7-LABEL: v_roundeven_v2f32: 88; GFX7: ; %bb.0: 89; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX7-NEXT: v_rndne_f32_e32 v0, v0 91; GFX7-NEXT: v_rndne_f32_e32 v1, v1 92; GFX7-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX8-LABEL: v_roundeven_v2f32: 95; GFX8: ; %bb.0: 96; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX8-NEXT: v_rndne_f32_e32 v0, v0 98; GFX8-NEXT: v_rndne_f32_e32 v1, v1 99; GFX8-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX9-LABEL: v_roundeven_v2f32: 102; GFX9: ; %bb.0: 103; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX9-NEXT: v_rndne_f32_e32 v0, v0 105; GFX9-NEXT: v_rndne_f32_e32 v1, v1 106; GFX9-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX10PLUS-LABEL: v_roundeven_v2f32: 109; GFX10PLUS: ; %bb.0: 110; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 112; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 113; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 114; 115; SDAG_GFX6-LABEL: v_roundeven_v2f32: 116; SDAG_GFX6: ; %bb.0: 117; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 119; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 120; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 121; 122; SDAG_GFX7-LABEL: v_roundeven_v2f32: 123; SDAG_GFX7: ; %bb.0: 124; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 126; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 127; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 128; 129; SDAG_GFX8-LABEL: v_roundeven_v2f32: 130; SDAG_GFX8: ; %bb.0: 131; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 133; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1 134; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 135; 136; SDAG_GFX9-LABEL: v_roundeven_v2f32: 137; SDAG_GFX9: ; %bb.0: 138; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 140; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1 141; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 142; 143; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f32: 144; SDAG_GFX10PLUS: ; %bb.0: 145; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 147; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 148; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 149 %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x) 150 ret <2 x float> %roundeven 151} 152 153define <3 x float> @v_roundeven_v3f32(<3 x float> %x) { 154; GFX6-LABEL: v_roundeven_v3f32: 155; GFX6: ; %bb.0: 156; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX6-NEXT: v_rndne_f32_e32 v0, v0 158; GFX6-NEXT: v_rndne_f32_e32 v1, v1 159; GFX6-NEXT: v_rndne_f32_e32 v2, v2 160; GFX6-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX7-LABEL: v_roundeven_v3f32: 163; GFX7: ; %bb.0: 164; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX7-NEXT: v_rndne_f32_e32 v0, v0 166; GFX7-NEXT: v_rndne_f32_e32 v1, v1 167; GFX7-NEXT: v_rndne_f32_e32 v2, v2 168; GFX7-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX8-LABEL: v_roundeven_v3f32: 171; GFX8: ; %bb.0: 172; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX8-NEXT: v_rndne_f32_e32 v0, v0 174; GFX8-NEXT: v_rndne_f32_e32 v1, v1 175; GFX8-NEXT: v_rndne_f32_e32 v2, v2 176; GFX8-NEXT: s_setpc_b64 s[30:31] 177; 178; GFX9-LABEL: v_roundeven_v3f32: 179; GFX9: ; %bb.0: 180; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX9-NEXT: v_rndne_f32_e32 v0, v0 182; GFX9-NEXT: v_rndne_f32_e32 v1, v1 183; GFX9-NEXT: v_rndne_f32_e32 v2, v2 184; GFX9-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX10PLUS-LABEL: v_roundeven_v3f32: 187; GFX10PLUS: ; %bb.0: 188; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 190; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 191; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 192; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 193; 194; SDAG_GFX6-LABEL: v_roundeven_v3f32: 195; SDAG_GFX6: ; %bb.0: 196; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 198; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 199; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2 200; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 201; 202; SDAG_GFX7-LABEL: v_roundeven_v3f32: 203; SDAG_GFX7: ; %bb.0: 204; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 205; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 206; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 207; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2 208; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 209; 210; SDAG_GFX8-LABEL: v_roundeven_v3f32: 211; SDAG_GFX8: ; %bb.0: 212; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 214; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1 215; SDAG_GFX8-NEXT: v_rndne_f32_e32 v2, v2 216; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 217; 218; SDAG_GFX9-LABEL: v_roundeven_v3f32: 219; SDAG_GFX9: ; %bb.0: 220; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 222; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1 223; SDAG_GFX9-NEXT: v_rndne_f32_e32 v2, v2 224; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 225; 226; SDAG_GFX10PLUS-LABEL: v_roundeven_v3f32: 227; SDAG_GFX10PLUS: ; %bb.0: 228; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 230; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 231; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 232; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 233 %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x) 234 ret <3 x float> %roundeven 235} 236 237define <4 x float> @v_roundeven_v4f32(<4 x float> %x) { 238; GFX6-LABEL: v_roundeven_v4f32: 239; GFX6: ; %bb.0: 240; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 241; GFX6-NEXT: v_rndne_f32_e32 v0, v0 242; GFX6-NEXT: v_rndne_f32_e32 v1, v1 243; GFX6-NEXT: v_rndne_f32_e32 v2, v2 244; GFX6-NEXT: v_rndne_f32_e32 v3, v3 245; GFX6-NEXT: s_setpc_b64 s[30:31] 246; 247; GFX7-LABEL: v_roundeven_v4f32: 248; GFX7: ; %bb.0: 249; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 250; GFX7-NEXT: v_rndne_f32_e32 v0, v0 251; GFX7-NEXT: v_rndne_f32_e32 v1, v1 252; GFX7-NEXT: v_rndne_f32_e32 v2, v2 253; GFX7-NEXT: v_rndne_f32_e32 v3, v3 254; GFX7-NEXT: s_setpc_b64 s[30:31] 255; 256; GFX8-LABEL: v_roundeven_v4f32: 257; GFX8: ; %bb.0: 258; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX8-NEXT: v_rndne_f32_e32 v0, v0 260; GFX8-NEXT: v_rndne_f32_e32 v1, v1 261; GFX8-NEXT: v_rndne_f32_e32 v2, v2 262; GFX8-NEXT: v_rndne_f32_e32 v3, v3 263; GFX8-NEXT: s_setpc_b64 s[30:31] 264; 265; GFX9-LABEL: v_roundeven_v4f32: 266; GFX9: ; %bb.0: 267; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX9-NEXT: v_rndne_f32_e32 v0, v0 269; GFX9-NEXT: v_rndne_f32_e32 v1, v1 270; GFX9-NEXT: v_rndne_f32_e32 v2, v2 271; GFX9-NEXT: v_rndne_f32_e32 v3, v3 272; GFX9-NEXT: s_setpc_b64 s[30:31] 273; 274; GFX10PLUS-LABEL: v_roundeven_v4f32: 275; GFX10PLUS: ; %bb.0: 276; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 277; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 278; GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 279; GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 280; GFX10PLUS-NEXT: v_rndne_f32_e32 v3, v3 281; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 282; 283; SDAG_GFX6-LABEL: v_roundeven_v4f32: 284; SDAG_GFX6: ; %bb.0: 285; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 286; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 287; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 288; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2 289; SDAG_GFX6-NEXT: v_rndne_f32_e32 v3, v3 290; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 291; 292; SDAG_GFX7-LABEL: v_roundeven_v4f32: 293; SDAG_GFX7: ; %bb.0: 294; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 295; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 296; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 297; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2 298; SDAG_GFX7-NEXT: v_rndne_f32_e32 v3, v3 299; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 300; 301; SDAG_GFX8-LABEL: v_roundeven_v4f32: 302; SDAG_GFX8: ; %bb.0: 303; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, v0 305; SDAG_GFX8-NEXT: v_rndne_f32_e32 v1, v1 306; SDAG_GFX8-NEXT: v_rndne_f32_e32 v2, v2 307; SDAG_GFX8-NEXT: v_rndne_f32_e32 v3, v3 308; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 309; 310; SDAG_GFX9-LABEL: v_roundeven_v4f32: 311; SDAG_GFX9: ; %bb.0: 312; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 313; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, v0 314; SDAG_GFX9-NEXT: v_rndne_f32_e32 v1, v1 315; SDAG_GFX9-NEXT: v_rndne_f32_e32 v2, v2 316; SDAG_GFX9-NEXT: v_rndne_f32_e32 v3, v3 317; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 318; 319; SDAG_GFX10PLUS-LABEL: v_roundeven_v4f32: 320; SDAG_GFX10PLUS: ; %bb.0: 321; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, v0 323; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v1, v1 324; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v2, v2 325; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v3, v3 326; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 327 %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x) 328 ret <4 x float> %roundeven 329} 330 331define half @v_roundeven_f16(half %x) { 332; GFX6-LABEL: v_roundeven_f16: 333; GFX6: ; %bb.0: 334; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 336; GFX6-NEXT: v_rndne_f32_e32 v0, v0 337; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 338; GFX6-NEXT: s_setpc_b64 s[30:31] 339; 340; GFX7-LABEL: v_roundeven_f16: 341; GFX7: ; %bb.0: 342; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 344; GFX7-NEXT: v_rndne_f32_e32 v0, v0 345; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 346; GFX7-NEXT: s_setpc_b64 s[30:31] 347; 348; GFX8-LABEL: v_roundeven_f16: 349; GFX8: ; %bb.0: 350; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; GFX8-NEXT: v_rndne_f16_e32 v0, v0 352; GFX8-NEXT: s_setpc_b64 s[30:31] 353; 354; GFX9-LABEL: v_roundeven_f16: 355; GFX9: ; %bb.0: 356; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 357; GFX9-NEXT: v_rndne_f16_e32 v0, v0 358; GFX9-NEXT: s_setpc_b64 s[30:31] 359; 360; GFX10PLUS-LABEL: v_roundeven_f16: 361; GFX10PLUS: ; %bb.0: 362; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 363; GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0 364; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 365; 366; SDAG_GFX6-LABEL: v_roundeven_f16: 367; SDAG_GFX6: ; %bb.0: 368; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 370; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 371; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 372; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 373; 374; SDAG_GFX7-LABEL: v_roundeven_f16: 375; SDAG_GFX7: ; %bb.0: 376; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 378; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 379; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 380; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 381; 382; SDAG_GFX8-LABEL: v_roundeven_f16: 383; SDAG_GFX8: ; %bb.0: 384; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 385; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0 386; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 387; 388; SDAG_GFX9-LABEL: v_roundeven_f16: 389; SDAG_GFX9: ; %bb.0: 390; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 391; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0 392; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 393; 394; SDAG_GFX10PLUS-LABEL: v_roundeven_f16: 395; SDAG_GFX10PLUS: ; %bb.0: 396; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 397; SDAG_GFX10PLUS-NEXT: v_rndne_f16_e32 v0, v0 398; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 399 %roundeven = call half @llvm.roundeven.f16(half %x) 400 ret half %roundeven 401} 402 403define <2 x half> @v_roundeven_v2f16(<2 x half> %x) { 404; GFX6-LABEL: v_roundeven_v2f16: 405; GFX6: ; %bb.0: 406; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 407; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 408; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 409; GFX6-NEXT: v_rndne_f32_e32 v0, v0 410; GFX6-NEXT: v_rndne_f32_e32 v1, v1 411; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 412; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 413; GFX6-NEXT: s_setpc_b64 s[30:31] 414; 415; GFX7-LABEL: v_roundeven_v2f16: 416; GFX7: ; %bb.0: 417; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 418; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 419; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 420; GFX7-NEXT: v_rndne_f32_e32 v0, v0 421; GFX7-NEXT: v_rndne_f32_e32 v1, v1 422; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 423; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 424; GFX7-NEXT: s_setpc_b64 s[30:31] 425; 426; GFX8-LABEL: v_roundeven_v2f16: 427; GFX8: ; %bb.0: 428; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; GFX8-NEXT: v_rndne_f16_e32 v1, v0 430; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 431; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 432; GFX8-NEXT: s_setpc_b64 s[30:31] 433; 434; GFX9-LABEL: v_roundeven_v2f16: 435; GFX9: ; %bb.0: 436; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437; GFX9-NEXT: v_rndne_f16_e32 v1, v0 438; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 439; GFX9-NEXT: v_pack_b32_f16 v0, v1, v0 440; GFX9-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX10-LABEL: v_roundeven_v2f16: 443; GFX10: ; %bb.0: 444; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX10-NEXT: v_rndne_f16_e32 v1, v0 446; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 447; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0 448; GFX10-NEXT: s_setpc_b64 s[30:31] 449; 450; GFX11-LABEL: v_roundeven_v2f16: 451; GFX11: ; %bb.0: 452; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 453; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 454; GFX11-NEXT: v_rndne_f16_e32 v0, v0 455; GFX11-NEXT: v_rndne_f16_e32 v1, v1 456; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 457; GFX11-NEXT: s_setpc_b64 s[30:31] 458; 459; SDAG_GFX6-LABEL: v_roundeven_v2f16: 460; SDAG_GFX6: ; %bb.0: 461; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 463; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 464; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 465; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 466; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 467; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 468; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 469; 470; SDAG_GFX7-LABEL: v_roundeven_v2f16: 471; SDAG_GFX7: ; %bb.0: 472; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 473; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 474; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 475; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 476; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 477; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 478; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 479; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 480; 481; SDAG_GFX8-LABEL: v_roundeven_v2f16: 482; SDAG_GFX8: ; %bb.0: 483; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 485; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0 486; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v1 487; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 488; 489; SDAG_GFX9-LABEL: v_roundeven_v2f16: 490; SDAG_GFX9: ; %bb.0: 491; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 493; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0 494; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 495; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 496; 497; SDAG_GFX10-LABEL: v_roundeven_v2f16: 498; SDAG_GFX10: ; %bb.0: 499; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 500; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 501; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0 502; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 503; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31] 504; 505; SDAG_GFX11-LABEL: v_roundeven_v2f16: 506; SDAG_GFX11: ; %bb.0: 507; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 509; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0 510; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1 511; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 512; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31] 513 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x) 514 ret <2 x half> %roundeven 515} 516 517define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) { 518; GFX6-LABEL: v_roundeven_v2f16_fneg: 519; GFX6: ; %bb.0: 520; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 522; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 523; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 524; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 525; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 526; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 527; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0 528; GFX6-NEXT: v_rndne_f32_e32 v0, v1 529; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 530; GFX6-NEXT: v_rndne_f32_e32 v1, v2 531; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 532; GFX6-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX7-LABEL: v_roundeven_v2f16_fneg: 535; GFX7: ; %bb.0: 536; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 538; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 539; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 540; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 541; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0 542; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 543; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0 544; GFX7-NEXT: v_rndne_f32_e32 v0, v1 545; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 546; GFX7-NEXT: v_rndne_f32_e32 v1, v2 547; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 548; GFX7-NEXT: s_setpc_b64 s[30:31] 549; 550; GFX8-LABEL: v_roundeven_v2f16_fneg: 551; GFX8: ; %bb.0: 552; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 553; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 554; GFX8-NEXT: v_rndne_f16_e32 v1, v0 555; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 556; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 557; GFX8-NEXT: s_setpc_b64 s[30:31] 558; 559; GFX9-LABEL: v_roundeven_v2f16_fneg: 560; GFX9: ; %bb.0: 561; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 563; GFX9-NEXT: v_rndne_f16_e32 v1, v0 564; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 565; GFX9-NEXT: v_pack_b32_f16 v0, v1, v0 566; GFX9-NEXT: s_setpc_b64 s[30:31] 567; 568; GFX10-LABEL: v_roundeven_v2f16_fneg: 569; GFX10: ; %bb.0: 570; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 572; GFX10-NEXT: v_rndne_f16_e32 v1, v0 573; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 574; GFX10-NEXT: v_pack_b32_f16 v0, v1, v0 575; GFX10-NEXT: s_setpc_b64 s[30:31] 576; 577; GFX11-LABEL: v_roundeven_v2f16_fneg: 578; GFX11: ; %bb.0: 579; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 581; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 582; GFX11-NEXT: v_rndne_f16_e32 v0, v0 583; GFX11-NEXT: v_rndne_f16_e32 v1, v1 584; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 585; GFX11-NEXT: s_setpc_b64 s[30:31] 586; 587; SDAG_GFX6-LABEL: v_roundeven_v2f16_fneg: 588; SDAG_GFX6: ; %bb.0: 589; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 590; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 591; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 592; SDAG_GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 593; SDAG_GFX6-NEXT: v_or_b32_e32 v0, v0, v1 594; SDAG_GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 595; SDAG_GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 596; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 597; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 598; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 599; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 600; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 601; 602; SDAG_GFX7-LABEL: v_roundeven_v2f16_fneg: 603; SDAG_GFX7: ; %bb.0: 604; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 605; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 606; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 607; SDAG_GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 608; SDAG_GFX7-NEXT: v_or_b32_e32 v0, v0, v1 609; SDAG_GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 610; SDAG_GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 611; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 612; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 613; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 614; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 615; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 616; 617; SDAG_GFX8-LABEL: v_roundeven_v2f16_fneg: 618; SDAG_GFX8: ; %bb.0: 619; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 620; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 621; SDAG_GFX8-NEXT: v_rndne_f16_e64 v0, -v0 622; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v1 623; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 624; 625; SDAG_GFX9-LABEL: v_roundeven_v2f16_fneg: 626; SDAG_GFX9: ; %bb.0: 627; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 628; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 629; SDAG_GFX9-NEXT: v_rndne_f16_e64 v0, -v0 630; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 631; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 632; 633; SDAG_GFX10-LABEL: v_roundeven_v2f16_fneg: 634; SDAG_GFX10: ; %bb.0: 635; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 636; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 637; SDAG_GFX10-NEXT: v_rndne_f16_e64 v0, -v0 638; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 639; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31] 640; 641; SDAG_GFX11-LABEL: v_roundeven_v2f16_fneg: 642; SDAG_GFX11: ; %bb.0: 643; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 644; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 645; SDAG_GFX11-NEXT: v_rndne_f16_e64 v0, -v0 646; SDAG_GFX11-NEXT: v_rndne_f16_e64 v1, -v1 647; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 648; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31] 649 %x.fneg = fneg <2 x half> %x 650 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg) 651 ret <2 x half> %roundeven 652} 653 654define <4 x half> @v_roundeven_v4f16(<4 x half> %x) { 655; GFX6-LABEL: v_roundeven_v4f16: 656; GFX6: ; %bb.0: 657; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 659; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 660; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 661; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 662; GFX6-NEXT: v_rndne_f32_e32 v0, v0 663; GFX6-NEXT: v_rndne_f32_e32 v1, v1 664; GFX6-NEXT: v_rndne_f32_e32 v2, v2 665; GFX6-NEXT: v_rndne_f32_e32 v3, v3 666; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 667; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 668; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 669; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 670; GFX6-NEXT: s_setpc_b64 s[30:31] 671; 672; GFX7-LABEL: v_roundeven_v4f16: 673; GFX7: ; %bb.0: 674; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 675; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 676; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 677; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 678; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 679; GFX7-NEXT: v_rndne_f32_e32 v0, v0 680; GFX7-NEXT: v_rndne_f32_e32 v1, v1 681; GFX7-NEXT: v_rndne_f32_e32 v2, v2 682; GFX7-NEXT: v_rndne_f32_e32 v3, v3 683; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 684; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 685; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 686; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 687; GFX7-NEXT: s_setpc_b64 s[30:31] 688; 689; GFX8-LABEL: v_roundeven_v4f16: 690; GFX8: ; %bb.0: 691; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 692; GFX8-NEXT: v_rndne_f16_e32 v2, v0 693; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 694; GFX8-NEXT: v_rndne_f16_e32 v3, v1 695; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 696; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 697; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 698; GFX8-NEXT: s_setpc_b64 s[30:31] 699; 700; GFX9-LABEL: v_roundeven_v4f16: 701; GFX9: ; %bb.0: 702; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX9-NEXT: v_rndne_f16_e32 v2, v0 704; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 705; GFX9-NEXT: v_rndne_f16_e32 v3, v1 706; GFX9-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 707; GFX9-NEXT: v_pack_b32_f16 v0, v2, v0 708; GFX9-NEXT: v_pack_b32_f16 v1, v3, v1 709; GFX9-NEXT: s_setpc_b64 s[30:31] 710; 711; GFX10-LABEL: v_roundeven_v4f16: 712; GFX10: ; %bb.0: 713; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 714; GFX10-NEXT: v_rndne_f16_e32 v2, v0 715; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 716; GFX10-NEXT: v_rndne_f16_e32 v3, v1 717; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 718; GFX10-NEXT: v_pack_b32_f16 v0, v2, v0 719; GFX10-NEXT: v_pack_b32_f16 v1, v3, v1 720; GFX10-NEXT: s_setpc_b64 s[30:31] 721; 722; GFX11-LABEL: v_roundeven_v4f16: 723; GFX11: ; %bb.0: 724; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 725; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 726; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 727; GFX11-NEXT: v_rndne_f16_e32 v0, v0 728; GFX11-NEXT: v_rndne_f16_e32 v1, v1 729; GFX11-NEXT: v_rndne_f16_e32 v2, v2 730; GFX11-NEXT: v_rndne_f16_e32 v3, v3 731; GFX11-NEXT: v_pack_b32_f16 v0, v0, v2 732; GFX11-NEXT: v_pack_b32_f16 v1, v1, v3 733; GFX11-NEXT: s_setpc_b64 s[30:31] 734; 735; SDAG_GFX6-LABEL: v_roundeven_v4f16: 736; SDAG_GFX6: ; %bb.0: 737; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 738; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 739; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 740; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 741; SDAG_GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 742; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 743; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 744; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 745; SDAG_GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 746; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, v0 747; SDAG_GFX6-NEXT: v_rndne_f32_e32 v1, v1 748; SDAG_GFX6-NEXT: v_rndne_f32_e32 v2, v2 749; SDAG_GFX6-NEXT: v_rndne_f32_e32 v3, v3 750; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 751; 752; SDAG_GFX7-LABEL: v_roundeven_v4f16: 753; SDAG_GFX7: ; %bb.0: 754; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 755; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 756; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 757; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 758; SDAG_GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 759; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 760; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 761; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 762; SDAG_GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 763; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, v0 764; SDAG_GFX7-NEXT: v_rndne_f32_e32 v1, v1 765; SDAG_GFX7-NEXT: v_rndne_f32_e32 v2, v2 766; SDAG_GFX7-NEXT: v_rndne_f32_e32 v3, v3 767; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 768; 769; SDAG_GFX8-LABEL: v_roundeven_v4f16: 770; SDAG_GFX8: ; %bb.0: 771; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 772; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 773; SDAG_GFX8-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 774; SDAG_GFX8-NEXT: v_rndne_f16_e32 v1, v1 775; SDAG_GFX8-NEXT: v_rndne_f16_e32 v0, v0 776; SDAG_GFX8-NEXT: v_or_b32_e32 v0, v0, v3 777; SDAG_GFX8-NEXT: v_or_b32_e32 v1, v1, v2 778; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 779; 780; SDAG_GFX9-LABEL: v_roundeven_v4f16: 781; SDAG_GFX9: ; %bb.0: 782; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 783; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 784; SDAG_GFX9-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 785; SDAG_GFX9-NEXT: v_rndne_f16_e32 v1, v1 786; SDAG_GFX9-NEXT: v_rndne_f16_e32 v0, v0 787; SDAG_GFX9-NEXT: v_pack_b32_f16 v0, v0, v3 788; SDAG_GFX9-NEXT: v_pack_b32_f16 v1, v1, v2 789; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 790; 791; SDAG_GFX10-LABEL: v_roundeven_v4f16: 792; SDAG_GFX10: ; %bb.0: 793; SDAG_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 794; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 795; SDAG_GFX10-NEXT: v_rndne_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 796; SDAG_GFX10-NEXT: v_rndne_f16_e32 v0, v0 797; SDAG_GFX10-NEXT: v_rndne_f16_e32 v1, v1 798; SDAG_GFX10-NEXT: v_pack_b32_f16 v0, v0, v3 799; SDAG_GFX10-NEXT: v_pack_b32_f16 v1, v1, v2 800; SDAG_GFX10-NEXT: s_setpc_b64 s[30:31] 801; 802; SDAG_GFX11-LABEL: v_roundeven_v4f16: 803; SDAG_GFX11: ; %bb.0: 804; SDAG_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 806; SDAG_GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 807; SDAG_GFX11-NEXT: v_rndne_f16_e32 v1, v1 808; SDAG_GFX11-NEXT: v_rndne_f16_e32 v0, v0 809; SDAG_GFX11-NEXT: v_rndne_f16_e32 v2, v2 810; SDAG_GFX11-NEXT: v_rndne_f16_e32 v3, v3 811; SDAG_GFX11-NEXT: v_pack_b32_f16 v0, v0, v2 812; SDAG_GFX11-NEXT: v_pack_b32_f16 v1, v1, v3 813; SDAG_GFX11-NEXT: s_setpc_b64 s[30:31] 814 %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) 815 ret <4 x half> %roundeven 816} 817 818 819define float @v_roundeven_f32_fabs(float %x) { 820; GFX6-LABEL: v_roundeven_f32_fabs: 821; GFX6: ; %bb.0: 822; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 823; GFX6-NEXT: v_rndne_f32_e64 v0, |v0| 824; GFX6-NEXT: s_setpc_b64 s[30:31] 825; 826; GFX7-LABEL: v_roundeven_f32_fabs: 827; GFX7: ; %bb.0: 828; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 829; GFX7-NEXT: v_rndne_f32_e64 v0, |v0| 830; GFX7-NEXT: s_setpc_b64 s[30:31] 831; 832; GFX8-LABEL: v_roundeven_f32_fabs: 833; GFX8: ; %bb.0: 834; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 835; GFX8-NEXT: v_rndne_f32_e64 v0, |v0| 836; GFX8-NEXT: s_setpc_b64 s[30:31] 837; 838; GFX9-LABEL: v_roundeven_f32_fabs: 839; GFX9: ; %bb.0: 840; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 841; GFX9-NEXT: v_rndne_f32_e64 v0, |v0| 842; GFX9-NEXT: s_setpc_b64 s[30:31] 843; 844; GFX10PLUS-LABEL: v_roundeven_f32_fabs: 845; GFX10PLUS: ; %bb.0: 846; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 847; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0| 848; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 849; 850; SDAG_GFX6-LABEL: v_roundeven_f32_fabs: 851; SDAG_GFX6: ; %bb.0: 852; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 853; SDAG_GFX6-NEXT: v_rndne_f32_e64 v0, |v0| 854; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 855; 856; SDAG_GFX7-LABEL: v_roundeven_f32_fabs: 857; SDAG_GFX7: ; %bb.0: 858; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 859; SDAG_GFX7-NEXT: v_rndne_f32_e64 v0, |v0| 860; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 861; 862; SDAG_GFX8-LABEL: v_roundeven_f32_fabs: 863; SDAG_GFX8: ; %bb.0: 864; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 865; SDAG_GFX8-NEXT: v_rndne_f32_e64 v0, |v0| 866; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 867; 868; SDAG_GFX9-LABEL: v_roundeven_f32_fabs: 869; SDAG_GFX9: ; %bb.0: 870; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 871; SDAG_GFX9-NEXT: v_rndne_f32_e64 v0, |v0| 872; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 873; 874; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fabs: 875; SDAG_GFX10PLUS: ; %bb.0: 876; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 877; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, |v0| 878; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 879 %fabs.x = call float @llvm.fabs.f32(float %x) 880 %roundeven = call float @llvm.roundeven.f32(float %fabs.x) 881 ret float %roundeven 882} 883 884define amdgpu_ps float @s_roundeven_f32(float inreg %x) { 885; GFX6-LABEL: s_roundeven_f32: 886; GFX6: ; %bb.0: 887; GFX6-NEXT: v_rndne_f32_e32 v0, s0 888; GFX6-NEXT: ; return to shader part epilog 889; 890; GFX7-LABEL: s_roundeven_f32: 891; GFX7: ; %bb.0: 892; GFX7-NEXT: v_rndne_f32_e32 v0, s0 893; GFX7-NEXT: ; return to shader part epilog 894; 895; GFX8-LABEL: s_roundeven_f32: 896; GFX8: ; %bb.0: 897; GFX8-NEXT: v_rndne_f32_e32 v0, s0 898; GFX8-NEXT: ; return to shader part epilog 899; 900; GFX9-LABEL: s_roundeven_f32: 901; GFX9: ; %bb.0: 902; GFX9-NEXT: v_rndne_f32_e32 v0, s0 903; GFX9-NEXT: ; return to shader part epilog 904; 905; GFX10PLUS-LABEL: s_roundeven_f32: 906; GFX10PLUS: ; %bb.0: 907; GFX10PLUS-NEXT: v_rndne_f32_e32 v0, s0 908; GFX10PLUS-NEXT: ; return to shader part epilog 909; 910; SDAG_GFX6-LABEL: s_roundeven_f32: 911; SDAG_GFX6: ; %bb.0: 912; SDAG_GFX6-NEXT: v_rndne_f32_e32 v0, s0 913; SDAG_GFX6-NEXT: ; return to shader part epilog 914; 915; SDAG_GFX7-LABEL: s_roundeven_f32: 916; SDAG_GFX7: ; %bb.0: 917; SDAG_GFX7-NEXT: v_rndne_f32_e32 v0, s0 918; SDAG_GFX7-NEXT: ; return to shader part epilog 919; 920; SDAG_GFX8-LABEL: s_roundeven_f32: 921; SDAG_GFX8: ; %bb.0: 922; SDAG_GFX8-NEXT: v_rndne_f32_e32 v0, s0 923; SDAG_GFX8-NEXT: ; return to shader part epilog 924; 925; SDAG_GFX9-LABEL: s_roundeven_f32: 926; SDAG_GFX9: ; %bb.0: 927; SDAG_GFX9-NEXT: v_rndne_f32_e32 v0, s0 928; SDAG_GFX9-NEXT: ; return to shader part epilog 929; 930; SDAG_GFX10PLUS-LABEL: s_roundeven_f32: 931; SDAG_GFX10PLUS: ; %bb.0: 932; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e32 v0, s0 933; SDAG_GFX10PLUS-NEXT: ; return to shader part epilog 934 %roundeven = call float @llvm.roundeven.f32(float %x) 935 ret float %roundeven 936} 937 938define float @v_roundeven_f32_fneg(float %x) { 939; GFX6-LABEL: v_roundeven_f32_fneg: 940; GFX6: ; %bb.0: 941; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 942; GFX6-NEXT: v_rndne_f32_e64 v0, -v0 943; GFX6-NEXT: s_setpc_b64 s[30:31] 944; 945; GFX7-LABEL: v_roundeven_f32_fneg: 946; GFX7: ; %bb.0: 947; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 948; GFX7-NEXT: v_rndne_f32_e64 v0, -v0 949; GFX7-NEXT: s_setpc_b64 s[30:31] 950; 951; GFX8-LABEL: v_roundeven_f32_fneg: 952; GFX8: ; %bb.0: 953; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 954; GFX8-NEXT: v_rndne_f32_e64 v0, -v0 955; GFX8-NEXT: s_setpc_b64 s[30:31] 956; 957; GFX9-LABEL: v_roundeven_f32_fneg: 958; GFX9: ; %bb.0: 959; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 960; GFX9-NEXT: v_rndne_f32_e64 v0, -v0 961; GFX9-NEXT: s_setpc_b64 s[30:31] 962; 963; GFX10PLUS-LABEL: v_roundeven_f32_fneg: 964; GFX10PLUS: ; %bb.0: 965; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 966; GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0 967; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 968; 969; SDAG_GFX6-LABEL: v_roundeven_f32_fneg: 970; SDAG_GFX6: ; %bb.0: 971; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 972; SDAG_GFX6-NEXT: v_rndne_f32_e64 v0, -v0 973; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 974; 975; SDAG_GFX7-LABEL: v_roundeven_f32_fneg: 976; SDAG_GFX7: ; %bb.0: 977; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 978; SDAG_GFX7-NEXT: v_rndne_f32_e64 v0, -v0 979; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 980; 981; SDAG_GFX8-LABEL: v_roundeven_f32_fneg: 982; SDAG_GFX8: ; %bb.0: 983; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; SDAG_GFX8-NEXT: v_rndne_f32_e64 v0, -v0 985; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 986; 987; SDAG_GFX9-LABEL: v_roundeven_f32_fneg: 988; SDAG_GFX9: ; %bb.0: 989; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 990; SDAG_GFX9-NEXT: v_rndne_f32_e64 v0, -v0 991; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 992; 993; SDAG_GFX10PLUS-LABEL: v_roundeven_f32_fneg: 994; SDAG_GFX10PLUS: ; %bb.0: 995; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; SDAG_GFX10PLUS-NEXT: v_rndne_f32_e64 v0, -v0 997; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 998 %neg.x = fneg float %x 999 %roundeven = call float @llvm.roundeven.f32(float %neg.x) 1000 ret float %roundeven 1001} 1002 1003define double @v_roundeven_f64(double %x) { 1004; GFX6-LABEL: v_roundeven_f64: 1005; GFX6: ; %bb.0: 1006; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1007; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v1 1008; GFX6-NEXT: v_mov_b32_e32 v2, 0 1009; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3 1010; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3] 1011; GFX6-NEXT: v_mov_b32_e32 v6, -1 1012; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff 1013; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] 1014; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7] 1015; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1016; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 1017; GFX6-NEXT: s_setpc_b64 s[30:31] 1018; 1019; GFX7-LABEL: v_roundeven_f64: 1020; GFX7: ; %bb.0: 1021; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1022; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1023; GFX7-NEXT: s_setpc_b64 s[30:31] 1024; 1025; GFX8-LABEL: v_roundeven_f64: 1026; GFX8: ; %bb.0: 1027; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1028; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1029; GFX8-NEXT: s_setpc_b64 s[30:31] 1030; 1031; GFX9-LABEL: v_roundeven_f64: 1032; GFX9: ; %bb.0: 1033; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1035; GFX9-NEXT: s_setpc_b64 s[30:31] 1036; 1037; GFX10PLUS-LABEL: v_roundeven_f64: 1038; GFX10PLUS: ; %bb.0: 1039; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1040; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1041; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1042; 1043; SDAG_GFX6-LABEL: v_roundeven_f64: 1044; SDAG_GFX6: ; %bb.0: 1045; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1046; SDAG_GFX6-NEXT: s_brev_b32 s6, -2 1047; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0x43300000 1048; SDAG_GFX6-NEXT: v_bfi_b32 v3, s6, v2, v1 1049; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0 1050; SDAG_GFX6-NEXT: s_mov_b32 s4, -1 1051; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3] 1052; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff 1053; SDAG_GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] 1054; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 1055; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1056; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 1057; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 1058; 1059; SDAG_GFX7-LABEL: v_roundeven_f64: 1060; SDAG_GFX7: ; %bb.0: 1061; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1062; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1063; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 1064; 1065; SDAG_GFX8-LABEL: v_roundeven_f64: 1066; SDAG_GFX8: ; %bb.0: 1067; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1068; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1069; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 1070; 1071; SDAG_GFX9-LABEL: v_roundeven_f64: 1072; SDAG_GFX9: ; %bb.0: 1073; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1074; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1075; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 1076; 1077; SDAG_GFX10PLUS-LABEL: v_roundeven_f64: 1078; SDAG_GFX10PLUS: ; %bb.0: 1079; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1080; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1081; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1082 %roundeven = call double @llvm.roundeven.f64(double %x) 1083 ret double %roundeven 1084} 1085 1086define double @v_roundeven_f64_fneg(double %x) { 1087; GFX6-LABEL: v_roundeven_f64_fneg: 1088; GFX6: ; %bb.0: 1089; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1090; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v1 1091; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v8 1092; GFX6-NEXT: v_mov_b32_e32 v2, 0 1093; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3 1094; GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3] 1095; GFX6-NEXT: v_mov_b32_e32 v6, -1 1096; GFX6-NEXT: v_mov_b32_e32 v7, 0x432fffff 1097; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] 1098; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, v[6:7] 1099; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1100; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v8, vcc 1101; GFX6-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX7-LABEL: v_roundeven_f64_fneg: 1104; GFX7: ; %bb.0: 1105; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1107; GFX7-NEXT: s_setpc_b64 s[30:31] 1108; 1109; GFX8-LABEL: v_roundeven_f64_fneg: 1110; GFX8: ; %bb.0: 1111; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1112; GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1113; GFX8-NEXT: s_setpc_b64 s[30:31] 1114; 1115; GFX9-LABEL: v_roundeven_f64_fneg: 1116; GFX9: ; %bb.0: 1117; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1118; GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1119; GFX9-NEXT: s_setpc_b64 s[30:31] 1120; 1121; GFX10PLUS-LABEL: v_roundeven_f64_fneg: 1122; GFX10PLUS: ; %bb.0: 1123; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1124; GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1125; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1126; 1127; SDAG_GFX6-LABEL: v_roundeven_f64_fneg: 1128; SDAG_GFX6: ; %bb.0: 1129; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1130; SDAG_GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1 1131; SDAG_GFX6-NEXT: s_brev_b32 s4, -2 1132; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0x43300000 1133; SDAG_GFX6-NEXT: v_bfi_b32 v3, s4, v2, v6 1134; SDAG_GFX6-NEXT: v_mov_b32_e32 v2, 0 1135; SDAG_GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3] 1136; SDAG_GFX6-NEXT: s_mov_b32 s4, -1 1137; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff 1138; SDAG_GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] 1139; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 1140; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1141; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc 1142; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 1143; 1144; SDAG_GFX7-LABEL: v_roundeven_f64_fneg: 1145; SDAG_GFX7: ; %bb.0: 1146; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1147; SDAG_GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1148; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 1149; 1150; SDAG_GFX8-LABEL: v_roundeven_f64_fneg: 1151; SDAG_GFX8: ; %bb.0: 1152; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153; SDAG_GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1154; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 1155; 1156; SDAG_GFX9-LABEL: v_roundeven_f64_fneg: 1157; SDAG_GFX9: ; %bb.0: 1158; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1159; SDAG_GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1160; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 1161; 1162; SDAG_GFX10PLUS-LABEL: v_roundeven_f64_fneg: 1163; SDAG_GFX10PLUS: ; %bb.0: 1164; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1165; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 1166; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1167 %neg.x = fneg double %x 1168 %roundeven = call double @llvm.roundeven.f64(double %neg.x) 1169 ret double %roundeven 1170} 1171 1172define <2 x double> @v_roundeven_v2f64(<2 x double> %x) { 1173; GFX6-LABEL: v_roundeven_v2f64: 1174; GFX6: ; %bb.0: 1175; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1176; GFX6-NEXT: v_and_b32_e32 v5, 0x80000000, v1 1177; GFX6-NEXT: v_mov_b32_e32 v4, 0 1178; GFX6-NEXT: v_or_b32_e32 v5, 0x43300000, v5 1179; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5] 1180; GFX6-NEXT: s_mov_b32 s4, -1 1181; GFX6-NEXT: s_mov_b32 s5, 0x432fffff 1182; GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5] 1183; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 1184; GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 1185; GFX6-NEXT: v_and_b32_e32 v5, 0x80000000, v3 1186; GFX6-NEXT: v_or_b32_e32 v5, 0x43300000, v5 1187; GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5] 1188; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 1189; GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5] 1190; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5] 1191; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 1192; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 1193; GFX6-NEXT: s_setpc_b64 s[30:31] 1194; 1195; GFX7-LABEL: v_roundeven_v2f64: 1196; GFX7: ; %bb.0: 1197; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1198; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1199; GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1200; GFX7-NEXT: s_setpc_b64 s[30:31] 1201; 1202; GFX8-LABEL: v_roundeven_v2f64: 1203; GFX8: ; %bb.0: 1204; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1205; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1206; GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1207; GFX8-NEXT: s_setpc_b64 s[30:31] 1208; 1209; GFX9-LABEL: v_roundeven_v2f64: 1210; GFX9: ; %bb.0: 1211; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1212; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1213; GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1214; GFX9-NEXT: s_setpc_b64 s[30:31] 1215; 1216; GFX10PLUS-LABEL: v_roundeven_v2f64: 1217; GFX10PLUS: ; %bb.0: 1218; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1219; GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1220; GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1221; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1222; 1223; SDAG_GFX6-LABEL: v_roundeven_v2f64: 1224; SDAG_GFX6: ; %bb.0: 1225; SDAG_GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1226; SDAG_GFX6-NEXT: s_brev_b32 s6, -2 1227; SDAG_GFX6-NEXT: v_mov_b32_e32 v8, 0x43300000 1228; SDAG_GFX6-NEXT: v_bfi_b32 v5, s6, v8, v1 1229; SDAG_GFX6-NEXT: v_mov_b32_e32 v4, 0 1230; SDAG_GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5] 1231; SDAG_GFX6-NEXT: s_mov_b32 s4, -1 1232; SDAG_GFX6-NEXT: s_mov_b32 s5, 0x432fffff 1233; SDAG_GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5] 1234; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 1235; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 1236; SDAG_GFX6-NEXT: v_bfi_b32 v5, s6, v8, v3 1237; SDAG_GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5] 1238; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 1239; SDAG_GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5] 1240; SDAG_GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5] 1241; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 1242; SDAG_GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 1243; SDAG_GFX6-NEXT: s_setpc_b64 s[30:31] 1244; 1245; SDAG_GFX7-LABEL: v_roundeven_v2f64: 1246; SDAG_GFX7: ; %bb.0: 1247; SDAG_GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1248; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1249; SDAG_GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1250; SDAG_GFX7-NEXT: s_setpc_b64 s[30:31] 1251; 1252; SDAG_GFX8-LABEL: v_roundeven_v2f64: 1253; SDAG_GFX8: ; %bb.0: 1254; SDAG_GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1255; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1256; SDAG_GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1257; SDAG_GFX8-NEXT: s_setpc_b64 s[30:31] 1258; 1259; SDAG_GFX9-LABEL: v_roundeven_v2f64: 1260; SDAG_GFX9: ; %bb.0: 1261; SDAG_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1262; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1263; SDAG_GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1264; SDAG_GFX9-NEXT: s_setpc_b64 s[30:31] 1265; 1266; SDAG_GFX10PLUS-LABEL: v_roundeven_v2f64: 1267; SDAG_GFX10PLUS: ; %bb.0: 1268; SDAG_GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1269; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 1270; SDAG_GFX10PLUS-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 1271; SDAG_GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1272 %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x) 1273 ret <2 x double> %roundeven 1274} 1275 1276declare half @llvm.roundeven.f16(half) #0 1277declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0 1278declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0 1279 1280declare float @llvm.roundeven.f32(float) #0 1281declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0 1282declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0 1283declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0 1284 1285declare double @llvm.roundeven.f64(double) #0 1286declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0 1287 1288declare half @llvm.fabs.f16(half) #0 1289declare float @llvm.fabs.f32(float) #0 1290 1291attributes #0 = { nounwind readnone speculatable willreturn } 1292