1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX8 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX900 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9ALL,GFX906 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s 7 8define i16 @shl_i16(i16 %x, i16 %y) { 9; GFX8-LABEL: shl_i16: 10; GFX8: ; %bb.0: 11; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX8-NEXT: v_lshlrev_b16_e32 v0, v1, v0 13; GFX8-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX9ALL-LABEL: shl_i16: 16; GFX9ALL: ; %bb.0: 17; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX9ALL-NEXT: v_lshlrev_b16_e32 v0, v1, v0 19; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 20; 21; GFX10-LABEL: shl_i16: 22; GFX10: ; %bb.0: 23; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0 25; GFX10-NEXT: s_setpc_b64 s[30:31] 26; 27; GFX11-LABEL: shl_i16: 28; GFX11: ; %bb.0: 29; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0 31; GFX11-NEXT: s_setpc_b64 s[30:31] 32 %res = shl i16 %x, %y 33 ret i16 %res 34} 35 36define i16 @lshr_i16(i16 %x, i16 %y) { 37; GFX8-LABEL: lshr_i16: 38; GFX8: ; %bb.0: 39; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40; GFX8-NEXT: v_lshrrev_b16_e32 v0, v1, v0 41; GFX8-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX9ALL-LABEL: lshr_i16: 44; GFX9ALL: ; %bb.0: 45; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX9ALL-NEXT: v_lshrrev_b16_e32 v0, v1, v0 47; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX10-LABEL: lshr_i16: 50; GFX10: ; %bb.0: 51; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0 53; GFX10-NEXT: s_setpc_b64 s[30:31] 54; 55; GFX11-LABEL: lshr_i16: 56; GFX11: ; %bb.0: 57; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0 59; GFX11-NEXT: s_setpc_b64 s[30:31] 60 %res = lshr i16 %x, %y 61 ret i16 %res 62} 63 64define i16 @ashr_i16(i16 %x, i16 %y) { 65; GFX8-LABEL: ashr_i16: 66; GFX8: ; %bb.0: 67; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0 69; GFX8-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX9ALL-LABEL: ashr_i16: 72; GFX9ALL: ; %bb.0: 73; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX9ALL-NEXT: v_ashrrev_i16_e32 v0, v1, v0 75; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX10-LABEL: ashr_i16: 78; GFX10: ; %bb.0: 79; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0 81; GFX10-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX11-LABEL: ashr_i16: 84; GFX11: ; %bb.0: 85; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0 87; GFX11-NEXT: s_setpc_b64 s[30:31] 88 %res = ashr i16 %x, %y 89 ret i16 %res 90} 91 92define i16 @add_u16(i16 %x, i16 %y) { 93; GFX8-LABEL: add_u16: 94; GFX8: ; %bb.0: 95; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX8-NEXT: v_add_u16_e32 v0, v0, v1 97; GFX8-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX9ALL-LABEL: add_u16: 100; GFX9ALL: ; %bb.0: 101; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX9ALL-NEXT: v_add_u16_e32 v0, v0, v1 103; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX10-LABEL: add_u16: 106; GFX10: ; %bb.0: 107; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 109; GFX10-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX11-LABEL: add_u16: 112; GFX11: ; %bb.0: 113; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX11-NEXT: v_add_nc_u16 v0, v0, v1 115; GFX11-NEXT: s_setpc_b64 s[30:31] 116 %res = add i16 %x, %y 117 ret i16 %res 118} 119 120define i16 @sub_u16(i16 %x, i16 %y) { 121; GFX8-LABEL: sub_u16: 122; GFX8: ; %bb.0: 123; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 124; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 125; GFX8-NEXT: s_setpc_b64 s[30:31] 126; 127; GFX9ALL-LABEL: sub_u16: 128; GFX9ALL: ; %bb.0: 129; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX9ALL-NEXT: v_sub_u16_e32 v0, v0, v1 131; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 132; 133; GFX10-LABEL: sub_u16: 134; GFX10: ; %bb.0: 135; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 137; GFX10-NEXT: s_setpc_b64 s[30:31] 138; 139; GFX11-LABEL: sub_u16: 140; GFX11: ; %bb.0: 141; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1 143; GFX11-NEXT: s_setpc_b64 s[30:31] 144 %res = sub i16 %x, %y 145 ret i16 %res 146} 147 148define i16 @mul_lo_u16(i16 %x, i16 %y) { 149; GFX8-LABEL: mul_lo_u16: 150; GFX8: ; %bb.0: 151; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1 153; GFX8-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX9ALL-LABEL: mul_lo_u16: 156; GFX9ALL: ; %bb.0: 157; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX9ALL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 159; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 160; 161; GFX10-LABEL: mul_lo_u16: 162; GFX10: ; %bb.0: 163; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 165; GFX10-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX11-LABEL: mul_lo_u16: 168; GFX11: ; %bb.0: 169; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1 171; GFX11-NEXT: s_setpc_b64 s[30:31] 172 %res = mul i16 %x, %y 173 ret i16 %res 174} 175 176define i16 @min_u16(i16 %x, i16 %y) { 177; GFX8-LABEL: min_u16: 178; GFX8: ; %bb.0: 179; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX8-NEXT: v_min_u16_e32 v0, v0, v1 181; GFX8-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX9ALL-LABEL: min_u16: 184; GFX9ALL: ; %bb.0: 185; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX9ALL-NEXT: v_min_u16_e32 v0, v0, v1 187; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 188; 189; GFX10-LABEL: min_u16: 190; GFX10: ; %bb.0: 191; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; GFX10-NEXT: v_min_u16 v0, v0, v1 193; GFX10-NEXT: s_setpc_b64 s[30:31] 194; 195; GFX11-LABEL: min_u16: 196; GFX11: ; %bb.0: 197; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX11-NEXT: v_min_u16 v0, v0, v1 199; GFX11-NEXT: s_setpc_b64 s[30:31] 200 %cmp = icmp ule i16 %x, %y 201 %res = select i1 %cmp, i16 %x, i16 %y 202 ret i16 %res 203} 204 205define i16 @min_i16(i16 %x, i16 %y) { 206; GFX8-LABEL: min_i16: 207; GFX8: ; %bb.0: 208; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX8-NEXT: v_min_i16_e32 v0, v0, v1 210; GFX8-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX9ALL-LABEL: min_i16: 213; GFX9ALL: ; %bb.0: 214; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX9ALL-NEXT: v_min_i16_e32 v0, v0, v1 216; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 217; 218; GFX10-LABEL: min_i16: 219; GFX10: ; %bb.0: 220; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; GFX10-NEXT: v_min_i16 v0, v0, v1 222; GFX10-NEXT: s_setpc_b64 s[30:31] 223; 224; GFX11-LABEL: min_i16: 225; GFX11: ; %bb.0: 226; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 227; GFX11-NEXT: v_min_i16 v0, v0, v1 228; GFX11-NEXT: s_setpc_b64 s[30:31] 229 %cmp = icmp sle i16 %x, %y 230 %res = select i1 %cmp, i16 %x, i16 %y 231 ret i16 %res 232} 233 234define i16 @max_u16(i16 %x, i16 %y) { 235; GFX8-LABEL: max_u16: 236; GFX8: ; %bb.0: 237; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX8-NEXT: v_max_u16_e32 v0, v0, v1 239; GFX8-NEXT: s_setpc_b64 s[30:31] 240; 241; GFX9ALL-LABEL: max_u16: 242; GFX9ALL: ; %bb.0: 243; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 244; GFX9ALL-NEXT: v_max_u16_e32 v0, v0, v1 245; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 246; 247; GFX10-LABEL: max_u16: 248; GFX10: ; %bb.0: 249; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 250; GFX10-NEXT: v_max_u16 v0, v0, v1 251; GFX10-NEXT: s_setpc_b64 s[30:31] 252; 253; GFX11-LABEL: max_u16: 254; GFX11: ; %bb.0: 255; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX11-NEXT: v_max_u16 v0, v0, v1 257; GFX11-NEXT: s_setpc_b64 s[30:31] 258 %cmp = icmp uge i16 %x, %y 259 %res = select i1 %cmp, i16 %x, i16 %y 260 ret i16 %res 261} 262 263define i16 @max_i16(i16 %x, i16 %y) { 264; GFX8-LABEL: max_i16: 265; GFX8: ; %bb.0: 266; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 267; GFX8-NEXT: v_max_i16_e32 v0, v0, v1 268; GFX8-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX9ALL-LABEL: max_i16: 271; GFX9ALL: ; %bb.0: 272; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX9ALL-NEXT: v_max_i16_e32 v0, v0, v1 274; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 275; 276; GFX10-LABEL: max_i16: 277; GFX10: ; %bb.0: 278; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 279; GFX10-NEXT: v_max_i16 v0, v0, v1 280; GFX10-NEXT: s_setpc_b64 s[30:31] 281; 282; GFX11-LABEL: max_i16: 283; GFX11: ; %bb.0: 284; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX11-NEXT: v_max_i16 v0, v0, v1 286; GFX11-NEXT: s_setpc_b64 s[30:31] 287 %cmp = icmp sge i16 %x, %y 288 %res = select i1 %cmp, i16 %x, i16 %y 289 ret i16 %res 290} 291 292define i32 @shl_i16_zext_i32(i16 %x, i16 %y) { 293; GFX8-LABEL: shl_i16_zext_i32: 294; GFX8: ; %bb.0: 295; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX8-NEXT: v_lshlrev_b16_e32 v0, v1, v0 297; GFX8-NEXT: s_setpc_b64 s[30:31] 298; 299; GFX9ALL-LABEL: shl_i16_zext_i32: 300; GFX9ALL: ; %bb.0: 301; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; GFX9ALL-NEXT: v_lshlrev_b16_e32 v0, v1, v0 303; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX10-LABEL: shl_i16_zext_i32: 306; GFX10: ; %bb.0: 307; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX10-NEXT: v_lshlrev_b16 v0, v1, v0 309; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 310; GFX10-NEXT: s_setpc_b64 s[30:31] 311; 312; GFX11-LABEL: shl_i16_zext_i32: 313; GFX11: ; %bb.0: 314; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GFX11-NEXT: v_lshlrev_b16 v0, v1, v0 316; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 317; GFX11-NEXT: s_setpc_b64 s[30:31] 318 %res = shl i16 %x, %y 319 %zext = zext i16 %res to i32 320 ret i32 %zext 321} 322 323define i32 @lshr_i16_zext_i32(i16 %x, i16 %y) { 324; GFX8-LABEL: lshr_i16_zext_i32: 325; GFX8: ; %bb.0: 326; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 327; GFX8-NEXT: v_lshrrev_b16_e32 v0, v1, v0 328; GFX8-NEXT: s_setpc_b64 s[30:31] 329; 330; GFX9ALL-LABEL: lshr_i16_zext_i32: 331; GFX9ALL: ; %bb.0: 332; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GFX9ALL-NEXT: v_lshrrev_b16_e32 v0, v1, v0 334; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX10-LABEL: lshr_i16_zext_i32: 337; GFX10: ; %bb.0: 338; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX10-NEXT: v_lshrrev_b16 v0, v1, v0 340; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 341; GFX10-NEXT: s_setpc_b64 s[30:31] 342; 343; GFX11-LABEL: lshr_i16_zext_i32: 344; GFX11: ; %bb.0: 345; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX11-NEXT: v_lshrrev_b16 v0, v1, v0 347; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 348; GFX11-NEXT: s_setpc_b64 s[30:31] 349 %res = lshr i16 %x, %y 350 %zext = zext i16 %res to i32 351 ret i32 %zext 352} 353 354define i32 @ashr_i16_zext_i32(i16 %x, i16 %y) { 355; GFX8-LABEL: ashr_i16_zext_i32: 356; GFX8: ; %bb.0: 357; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0 359; GFX8-NEXT: s_setpc_b64 s[30:31] 360; 361; GFX9ALL-LABEL: ashr_i16_zext_i32: 362; GFX9ALL: ; %bb.0: 363; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX9ALL-NEXT: v_ashrrev_i16_e32 v0, v1, v0 365; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX10-LABEL: ashr_i16_zext_i32: 368; GFX10: ; %bb.0: 369; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX10-NEXT: v_ashrrev_i16 v0, v1, v0 371; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 372; GFX10-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX11-LABEL: ashr_i16_zext_i32: 375; GFX11: ; %bb.0: 376; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX11-NEXT: v_ashrrev_i16 v0, v1, v0 378; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 379; GFX11-NEXT: s_setpc_b64 s[30:31] 380 %res = ashr i16 %x, %y 381 %zext = zext i16 %res to i32 382 ret i32 %zext 383} 384 385define i32 @add_u16_zext_i32(i16 %x, i16 %y) { 386; GFX8-LABEL: add_u16_zext_i32: 387; GFX8: ; %bb.0: 388; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX8-NEXT: v_add_u16_e32 v0, v0, v1 390; GFX8-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX9ALL-LABEL: add_u16_zext_i32: 393; GFX9ALL: ; %bb.0: 394; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX9ALL-NEXT: v_add_u16_e32 v0, v0, v1 396; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 397; 398; GFX10-LABEL: add_u16_zext_i32: 399; GFX10: ; %bb.0: 400; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 401; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 402; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 403; GFX10-NEXT: s_setpc_b64 s[30:31] 404; 405; GFX11-LABEL: add_u16_zext_i32: 406; GFX11: ; %bb.0: 407; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX11-NEXT: v_add_nc_u16 v0, v0, v1 409; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 410; GFX11-NEXT: s_setpc_b64 s[30:31] 411 %res = add i16 %x, %y 412 %zext = zext i16 %res to i32 413 ret i32 %zext 414} 415 416define i32 @sub_u16_zext_i32(i16 %x, i16 %y) { 417; GFX8-LABEL: sub_u16_zext_i32: 418; GFX8: ; %bb.0: 419; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 421; GFX8-NEXT: s_setpc_b64 s[30:31] 422; 423; GFX9ALL-LABEL: sub_u16_zext_i32: 424; GFX9ALL: ; %bb.0: 425; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GFX9ALL-NEXT: v_sub_u16_e32 v0, v0, v1 427; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 428; 429; GFX10-LABEL: sub_u16_zext_i32: 430; GFX10: ; %bb.0: 431; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 432; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 433; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 434; GFX10-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX11-LABEL: sub_u16_zext_i32: 437; GFX11: ; %bb.0: 438; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX11-NEXT: v_sub_nc_u16 v0, v0, v1 440; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 441; GFX11-NEXT: s_setpc_b64 s[30:31] 442 %res = sub i16 %x, %y 443 %zext = zext i16 %res to i32 444 ret i32 %zext 445} 446 447define i32 @mul_lo_u16_zext_i32(i16 %x, i16 %y) { 448; GFX8-LABEL: mul_lo_u16_zext_i32: 449; GFX8: ; %bb.0: 450; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 451; GFX8-NEXT: v_mul_lo_u16_e32 v0, v0, v1 452; GFX8-NEXT: s_setpc_b64 s[30:31] 453; 454; GFX9ALL-LABEL: mul_lo_u16_zext_i32: 455; GFX9ALL: ; %bb.0: 456; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 457; GFX9ALL-NEXT: v_mul_lo_u16_e32 v0, v0, v1 458; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 459; 460; GFX10-LABEL: mul_lo_u16_zext_i32: 461; GFX10: ; %bb.0: 462; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 464; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 465; GFX10-NEXT: s_setpc_b64 s[30:31] 466; 467; GFX11-LABEL: mul_lo_u16_zext_i32: 468; GFX11: ; %bb.0: 469; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 470; GFX11-NEXT: v_mul_lo_u16 v0, v0, v1 471; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 472; GFX11-NEXT: s_setpc_b64 s[30:31] 473 %res = mul i16 %x, %y 474 %zext = zext i16 %res to i32 475 ret i32 %zext 476} 477 478define i32 @min_u16_zext_i32(i16 %x, i16 %y) { 479; GFX8-LABEL: min_u16_zext_i32: 480; GFX8: ; %bb.0: 481; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 482; GFX8-NEXT: v_min_u16_e32 v0, v0, v1 483; GFX8-NEXT: s_setpc_b64 s[30:31] 484; 485; GFX9ALL-LABEL: min_u16_zext_i32: 486; GFX9ALL: ; %bb.0: 487; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GFX9ALL-NEXT: v_min_u16_e32 v0, v0, v1 489; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX10-LABEL: min_u16_zext_i32: 492; GFX10: ; %bb.0: 493; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX10-NEXT: v_min_u16 v0, v0, v1 495; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 496; GFX10-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX11-LABEL: min_u16_zext_i32: 499; GFX11: ; %bb.0: 500; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX11-NEXT: v_min_u16 v0, v0, v1 502; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 503; GFX11-NEXT: s_setpc_b64 s[30:31] 504 %cmp = icmp ule i16 %x, %y 505 %res = select i1 %cmp, i16 %x, i16 %y 506 %zext = zext i16 %res to i32 507 ret i32 %zext 508} 509 510define i32 @min_i16_zext_i32(i16 %x, i16 %y) { 511; GFX8-LABEL: min_i16_zext_i32: 512; GFX8: ; %bb.0: 513; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 514; GFX8-NEXT: v_min_i16_e32 v0, v0, v1 515; GFX8-NEXT: s_setpc_b64 s[30:31] 516; 517; GFX9ALL-LABEL: min_i16_zext_i32: 518; GFX9ALL: ; %bb.0: 519; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX9ALL-NEXT: v_min_i16_e32 v0, v0, v1 521; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 522; 523; GFX10-LABEL: min_i16_zext_i32: 524; GFX10: ; %bb.0: 525; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 526; GFX10-NEXT: v_min_i16 v0, v0, v1 527; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 528; GFX10-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX11-LABEL: min_i16_zext_i32: 531; GFX11: ; %bb.0: 532; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX11-NEXT: v_min_i16 v0, v0, v1 534; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 535; GFX11-NEXT: s_setpc_b64 s[30:31] 536 %cmp = icmp sle i16 %x, %y 537 %res = select i1 %cmp, i16 %x, i16 %y 538 %zext = zext i16 %res to i32 539 ret i32 %zext 540} 541 542define i32 @max_u16_zext_i32(i16 %x, i16 %y) { 543; GFX8-LABEL: max_u16_zext_i32: 544; GFX8: ; %bb.0: 545; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX8-NEXT: v_max_u16_e32 v0, v0, v1 547; GFX8-NEXT: s_setpc_b64 s[30:31] 548; 549; GFX9ALL-LABEL: max_u16_zext_i32: 550; GFX9ALL: ; %bb.0: 551; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 552; GFX9ALL-NEXT: v_max_u16_e32 v0, v0, v1 553; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 554; 555; GFX10-LABEL: max_u16_zext_i32: 556; GFX10: ; %bb.0: 557; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 558; GFX10-NEXT: v_max_u16 v0, v0, v1 559; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 560; GFX10-NEXT: s_setpc_b64 s[30:31] 561; 562; GFX11-LABEL: max_u16_zext_i32: 563; GFX11: ; %bb.0: 564; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 565; GFX11-NEXT: v_max_u16 v0, v0, v1 566; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 567; GFX11-NEXT: s_setpc_b64 s[30:31] 568 %cmp = icmp uge i16 %x, %y 569 %res = select i1 %cmp, i16 %x, i16 %y 570 %zext = zext i16 %res to i32 571 ret i32 %zext 572} 573 574define i32 @max_i16_zext_i32(i16 %x, i16 %y) { 575; GFX8-LABEL: max_i16_zext_i32: 576; GFX8: ; %bb.0: 577; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 578; GFX8-NEXT: v_max_i16_e32 v0, v0, v1 579; GFX8-NEXT: s_setpc_b64 s[30:31] 580; 581; GFX9ALL-LABEL: max_i16_zext_i32: 582; GFX9ALL: ; %bb.0: 583; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 584; GFX9ALL-NEXT: v_max_i16_e32 v0, v0, v1 585; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 586; 587; GFX10-LABEL: max_i16_zext_i32: 588; GFX10: ; %bb.0: 589; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 590; GFX10-NEXT: v_max_i16 v0, v0, v1 591; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 592; GFX10-NEXT: s_setpc_b64 s[30:31] 593; 594; GFX11-LABEL: max_i16_zext_i32: 595; GFX11: ; %bb.0: 596; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX11-NEXT: v_max_i16 v0, v0, v1 598; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 599; GFX11-NEXT: s_setpc_b64 s[30:31] 600 %cmp = icmp sge i16 %x, %y 601 %res = select i1 %cmp, i16 %x, i16 %y 602 %zext = zext i16 %res to i32 603 ret i32 %zext 604} 605 606define i32 @zext_fadd_f16(half %x, half %y) { 607; GFX8-LABEL: zext_fadd_f16: 608; GFX8: ; %bb.0: 609; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 610; GFX8-NEXT: v_add_f16_e32 v0, v0, v1 611; GFX8-NEXT: s_setpc_b64 s[30:31] 612; 613; GFX9ALL-LABEL: zext_fadd_f16: 614; GFX9ALL: ; %bb.0: 615; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 616; GFX9ALL-NEXT: v_add_f16_e32 v0, v0, v1 617; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 618; 619; GFX10-LABEL: zext_fadd_f16: 620; GFX10: ; %bb.0: 621; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 622; GFX10-NEXT: v_add_f16_e32 v0, v0, v1 623; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 624; GFX10-NEXT: s_setpc_b64 s[30:31] 625; 626; GFX11-LABEL: zext_fadd_f16: 627; GFX11: ; %bb.0: 628; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; GFX11-NEXT: v_add_f16_e32 v0, v0, v1 630; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 631; GFX11-NEXT: s_setpc_b64 s[30:31] 632 %add = fadd half %x, %y 633 %cast = bitcast half %add to i16 634 %zext = zext i16 %cast to i32 635 ret i32 %zext 636} 637 638define i32 @zext_fma_f16(half %x, half %y, half %z) { 639; GFX8-LABEL: zext_fma_f16: 640; GFX8: ; %bb.0: 641; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 642; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 643; GFX8-NEXT: s_setpc_b64 s[30:31] 644; 645; GFX9ALL-LABEL: zext_fma_f16: 646; GFX9ALL: ; %bb.0: 647; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 648; GFX9ALL-NEXT: v_fma_f16 v0, v0, v1, v2 649; GFX9ALL-NEXT: v_and_b32_e32 v0, 0xffff, v0 650; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX10-LABEL: zext_fma_f16: 653; GFX10: ; %bb.0: 654; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX10-NEXT: v_fmac_f16_e32 v2, v0, v1 656; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v2 657; GFX10-NEXT: s_setpc_b64 s[30:31] 658; 659; GFX11-LABEL: zext_fma_f16: 660; GFX11: ; %bb.0: 661; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 662; GFX11-NEXT: v_fmac_f16_e32 v2, v0, v1 663; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v2 664; GFX11-NEXT: s_setpc_b64 s[30:31] 665 %fma = call half @llvm.fma.f16(half %x, half %y, half %z) 666 %cast = bitcast half %fma to i16 667 %zext = zext i16 %cast to i32 668 ret i32 %zext 669} 670 671define i32 @zext_div_fixup_f16(half %x, half %y, half %z) { 672; GFX8-LABEL: zext_div_fixup_f16: 673; GFX8: ; %bb.0: 674; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 675; GFX8-NEXT: v_div_fixup_f16 v0, v0, v1, v2 676; GFX8-NEXT: s_setpc_b64 s[30:31] 677; 678; GFX9ALL-LABEL: zext_div_fixup_f16: 679; GFX9ALL: ; %bb.0: 680; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 681; GFX9ALL-NEXT: v_div_fixup_f16 v0, v0, v1, v2 682; GFX9ALL-NEXT: v_and_b32_e32 v0, 0xffff, v0 683; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 684; 685; GFX10-LABEL: zext_div_fixup_f16: 686; GFX10: ; %bb.0: 687; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX10-NEXT: v_div_fixup_f16 v0, v0, v1, v2 689; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 690; GFX10-NEXT: s_setpc_b64 s[30:31] 691; 692; GFX11-LABEL: zext_div_fixup_f16: 693; GFX11: ; %bb.0: 694; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 695; GFX11-NEXT: v_div_fixup_f16 v0, v0, v1, v2 696; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 697; GFX11-NEXT: s_setpc_b64 s[30:31] 698 %div.fixup = call half @llvm.amdgcn.div.fixup.f16(half %x, half %y, half %z) 699 %cast = bitcast half %div.fixup to i16 700 %zext = zext i16 %cast to i32 701 ret i32 %zext 702} 703 704; We technically could eliminate the and on gfx9 here but we don't try 705; to inspect the source of the fptrunc. We're only worried about cases 706; that lower to v_fma_mix* instructions. 707define i32 @zext_fptrunc_f16(float %x) { 708; GFX8-LABEL: zext_fptrunc_f16: 709; GFX8: ; %bb.0: 710; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 711; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 712; GFX8-NEXT: s_setpc_b64 s[30:31] 713; 714; GFX9ALL-LABEL: zext_fptrunc_f16: 715; GFX9ALL: ; %bb.0: 716; GFX9ALL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 717; GFX9ALL-NEXT: v_cvt_f16_f32_e32 v0, v0 718; GFX9ALL-NEXT: s_setpc_b64 s[30:31] 719; 720; GFX10-LABEL: zext_fptrunc_f16: 721; GFX10: ; %bb.0: 722; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 723; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 724; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 725; GFX10-NEXT: s_setpc_b64 s[30:31] 726; 727; GFX11-LABEL: zext_fptrunc_f16: 728; GFX11: ; %bb.0: 729; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 731; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 732; GFX11-NEXT: s_setpc_b64 s[30:31] 733 %fptrunc = fptrunc float %x to half 734 %cast = bitcast half %fptrunc to i16 735 %zext = zext i16 %cast to i32 736 ret i32 %zext 737} 738 739define i32 @zext_fptrunc_fma_f16(float %x, float %y, float %z) { 740; GFX8-LABEL: zext_fptrunc_fma_f16: 741; GFX8: ; %bb.0: 742; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 743; GFX8-NEXT: v_fma_f32 v0, v0, v1, v2 744; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 745; GFX8-NEXT: s_setpc_b64 s[30:31] 746; 747; GFX900-LABEL: zext_fptrunc_fma_f16: 748; GFX900: ; %bb.0: 749; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 750; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2 751; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 752; GFX900-NEXT: s_setpc_b64 s[30:31] 753; 754; GFX906-LABEL: zext_fptrunc_fma_f16: 755; GFX906: ; %bb.0: 756; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 757; GFX906-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 758; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 759; GFX906-NEXT: s_setpc_b64 s[30:31] 760; 761; GFX10-LABEL: zext_fptrunc_fma_f16: 762; GFX10: ; %bb.0: 763; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 764; GFX10-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 765; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 766; GFX10-NEXT: s_setpc_b64 s[30:31] 767; 768; GFX11-LABEL: zext_fptrunc_fma_f16: 769; GFX11: ; %bb.0: 770; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 771; GFX11-NEXT: v_fma_mixlo_f16 v0, v0, v1, v2 772; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 773; GFX11-NEXT: s_setpc_b64 s[30:31] 774 %fma = call float @llvm.fma.f32(float %x, float %y, float %z) 775 %fptrunc = fptrunc float %fma to half 776 %cast = bitcast half %fptrunc to i16 777 %zext = zext i16 %cast to i32 778 ret i32 %zext 779} 780 781declare half @llvm.amdgcn.div.fixup.f16(half, half, half) 782declare half @llvm.fma.f16(half, half, half) 783declare float @llvm.fma.f32(float, float, float) 784