1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 5 6define i32 @v_usubo_i32(i32 %a, i32 %b) { 7; GFX7-LABEL: v_usubo_i32: 8; GFX7: ; %bb.0: 9; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 11; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 12; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 13; GFX7-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX8-LABEL: v_usubo_i32: 16; GFX8: ; %bb.0: 17; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 19; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 20; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 21; GFX8-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX9-LABEL: v_usubo_i32: 24; GFX9: ; %bb.0: 25; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v1 27; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 28; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 29; GFX9-NEXT: s_setpc_b64 s[30:31] 30 %usubo = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 31 %sub = extractvalue {i32, i1} %usubo, 0 32 %of = extractvalue {i32, i1} %usubo, 1 33 %of.zext = zext i1 %of to i32 34 %ret = sub i32 %sub, %of.zext 35 ret i32 %ret 36} 37 38define i64 @v_usubo_i64(i64 %a, i64 %b) { 39; GFX7-LABEL: v_usubo_i64: 40; GFX7: ; %bb.0: 41; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 43; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 44; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 45; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 46; GFX7-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 47; GFX7-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX8-LABEL: v_usubo_i64: 50; GFX8: ; %bb.0: 51; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 53; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 54; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 55; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 56; GFX8-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 57; GFX8-NEXT: s_setpc_b64 s[30:31] 58; 59; GFX9-LABEL: v_usubo_i64: 60; GFX9: ; %bb.0: 61; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 63; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 64; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 65; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 66; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc 67; GFX9-NEXT: s_setpc_b64 s[30:31] 68 %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 69 %sub = extractvalue {i64, i1} %usubo, 0 70 %of = extractvalue {i64, i1} %usubo, 1 71 %of.zext = zext i1 %of to i64 72 %ret = sub i64 %sub, %of.zext 73 ret i64 %ret 74} 75 76define i8 @v_usubo_i8(i8 %a, i8 %b) { 77; GFX7-LABEL: v_usubo_i8: 78; GFX7: ; %bb.0: 79; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0 81; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v1 82; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 83; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v0 84; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 85; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 86; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 87; GFX7-NEXT: s_setpc_b64 s[30:31] 88; 89; GFX8-LABEL: v_usubo_i8: 90; GFX8: ; %bb.0: 91; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0 93; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 94; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 95; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v0 96; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 97; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 98; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 99; GFX8-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX9-LABEL: v_usubo_i8: 102; GFX9: ; %bb.0: 103; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX9-NEXT: v_sub_u32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 105; GFX9-NEXT: v_cmp_ne_u32_sdwa s[4:5], v0, v0 src0_sel:DWORD src1_sel:BYTE_0 106; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 107; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1 108; GFX9-NEXT: s_setpc_b64 s[30:31] 109 %usubo = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b) 110 %sub = extractvalue {i8, i1} %usubo, 0 111 %of = extractvalue {i8, i1} %usubo, 1 112 %of.zext = zext i1 %of to i8 113 %ret = sub i8 %sub, %of.zext 114 ret i8 %ret 115} 116 117define i7 @v_usubo_i7(i7 %a, i7 %b) { 118; GFX7-LABEL: v_usubo_i7: 119; GFX7: ; %bb.0: 120; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX7-NEXT: v_and_b32_e32 v0, 0x7f, v0 122; GFX7-NEXT: v_and_b32_e32 v1, 0x7f, v1 123; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 124; GFX7-NEXT: v_and_b32_e32 v1, 0x7f, v0 125; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 126; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 127; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 128; GFX7-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX8-LABEL: v_usubo_i7: 131; GFX8: ; %bb.0: 132; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0 134; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 135; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 136; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v0 137; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 138; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 139; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 140; GFX8-NEXT: s_setpc_b64 s[30:31] 141; 142; GFX9-LABEL: v_usubo_i7: 143; GFX9: ; %bb.0: 144; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0 146; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 147; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 148; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v0 149; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 150; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 151; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1 152; GFX9-NEXT: s_setpc_b64 s[30:31] 153 %usubo = call {i7, i1} @llvm.usub.with.overflow.i7(i7 %a, i7 %b) 154 %sub = extractvalue {i7, i1} %usubo, 0 155 %of = extractvalue {i7, i1} %usubo, 1 156 %of.zext = zext i1 %of to i7 157 %ret = sub i7 %sub, %of.zext 158 ret i7 %ret 159} 160 161define <2 x i32> @v_usubo_v2i32(<2 x i32> %a, <2 x i32> %b) { 162; GFX7-LABEL: v_usubo_v2i32: 163; GFX7: ; %bb.0: 164; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 166; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 167; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 168; GFX7-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 169; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 170; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 171; GFX7-NEXT: s_setpc_b64 s[30:31] 172; 173; GFX8-LABEL: v_usubo_v2i32: 174; GFX8: ; %bb.0: 175; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 177; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 178; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v3 179; GFX8-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 180; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 181; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v1, v3 182; GFX8-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX9-LABEL: v_usubo_v2i32: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 188; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 189; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3 190; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 191; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 192; GFX9-NEXT: v_sub_u32_e32 v1, v1, v3 193; GFX9-NEXT: s_setpc_b64 s[30:31] 194 %usubo = call {<2 x i32>, <2 x i1>} @llvm.usub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) 195 %sub = extractvalue {<2 x i32>, <2 x i1>} %usubo, 0 196 %of = extractvalue {<2 x i32>, <2 x i1>} %usubo, 1 197 %of.zext = zext <2 x i1> %of to <2 x i32> 198 %ret = sub <2 x i32> %sub, %of.zext 199 ret <2 x i32> %ret 200} 201 202define i32 @v_ssubo_i32(i32 %a, i32 %b) { 203; GFX7-LABEL: v_ssubo_i32: 204; GFX7: ; %bb.0: 205; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX7-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 207; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v2, v0 208; GFX7-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v1 209; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 210; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 211; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 212; GFX7-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX8-LABEL: v_ssubo_i32: 215; GFX8: ; %bb.0: 216; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v0, v1 218; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v2, v0 219; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v1 220; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 221; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 222; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v2, v0 223; GFX8-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX9-LABEL: v_ssubo_i32: 226; GFX9: ; %bb.0: 227; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 229; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v2, v0 230; GFX9-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v1 231; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 232; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 233; GFX9-NEXT: v_sub_u32_e32 v0, v2, v0 234; GFX9-NEXT: s_setpc_b64 s[30:31] 235 %ssubo = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 236 %sub = extractvalue {i32, i1} %ssubo, 0 237 %of = extractvalue {i32, i1} %ssubo, 1 238 %of.zext = zext i1 %of to i32 239 %ret = sub i32 %sub, %of.zext 240 ret i32 %ret 241} 242 243define i64 @v_ssubo_i64(i64 %a, i64 %b) { 244; GFX7-LABEL: v_ssubo_i64: 245; GFX7: ; %bb.0: 246; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 247; GFX7-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 248; GFX7-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 249; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 250; GFX7-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 251; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 252; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 253; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 254; GFX7-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v5, vcc 255; GFX7-NEXT: s_setpc_b64 s[30:31] 256; 257; GFX8-LABEL: v_ssubo_i64: 258; GFX8: ; %bb.0: 259; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2 261; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 262; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 263; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 264; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 265; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 266; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v4, v0 267; GFX8-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v5, vcc 268; GFX8-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX9-LABEL: v_ssubo_i64: 271; GFX9: ; %bb.0: 272; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2 274; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc 275; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 276; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 277; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 278; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 279; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v0 280; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v5, vcc 281; GFX9-NEXT: s_setpc_b64 s[30:31] 282 %ssubo = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) 283 %sub = extractvalue {i64, i1} %ssubo, 0 284 %of = extractvalue {i64, i1} %ssubo, 1 285 %of.zext = zext i1 %of to i64 286 %ret = sub i64 %sub, %of.zext 287 ret i64 %ret 288} 289 290define <2 x i32> @v_ssubo_v2i32(<2 x i32> %a, <2 x i32> %b) { 291; GFX7-LABEL: v_ssubo_v2i32: 292; GFX7: ; %bb.0: 293; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX7-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 295; GFX7-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 296; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v4, v0 297; GFX7-NEXT: v_cmp_lt_i32_e64 s[4:5], v5, v1 298; GFX7-NEXT: v_cmp_lt_i32_e64 s[6:7], 0, v2 299; GFX7-NEXT: v_cmp_lt_i32_e64 s[8:9], 0, v3 300; GFX7-NEXT: s_xor_b64 s[6:7], s[6:7], vcc 301; GFX7-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] 302; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] 303; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 304; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v4, v0 305; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 306; GFX7-NEXT: s_setpc_b64 s[30:31] 307; 308; GFX8-LABEL: v_ssubo_v2i32: 309; GFX8: ; %bb.0: 310; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 311; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2 312; GFX8-NEXT: v_sub_u32_e32 v5, vcc, v1, v3 313; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v4, v0 314; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v5, v1 315; GFX8-NEXT: v_cmp_lt_i32_e64 s[6:7], 0, v2 316; GFX8-NEXT: v_cmp_lt_i32_e64 s[8:9], 0, v3 317; GFX8-NEXT: s_xor_b64 s[6:7], s[6:7], vcc 318; GFX8-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] 319; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] 320; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 321; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v4, v0 322; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v5, v1 323; GFX8-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX9-LABEL: v_ssubo_v2i32: 326; GFX9: ; %bb.0: 327; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX9-NEXT: v_sub_u32_e32 v4, v0, v2 329; GFX9-NEXT: v_sub_u32_e32 v5, v1, v3 330; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v4, v0 331; GFX9-NEXT: v_cmp_lt_i32_e64 s[4:5], v5, v1 332; GFX9-NEXT: v_cmp_lt_i32_e64 s[6:7], 0, v2 333; GFX9-NEXT: v_cmp_lt_i32_e64 s[8:9], 0, v3 334; GFX9-NEXT: s_xor_b64 s[6:7], s[6:7], vcc 335; GFX9-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] 336; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] 337; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 338; GFX9-NEXT: v_sub_u32_e32 v0, v4, v0 339; GFX9-NEXT: v_sub_u32_e32 v1, v5, v1 340; GFX9-NEXT: s_setpc_b64 s[30:31] 341 %ssubo = call {<2 x i32>, <2 x i1>} @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) 342 %sub = extractvalue {<2 x i32>, <2 x i1>} %ssubo, 0 343 %of = extractvalue {<2 x i32>, <2 x i1>} %ssubo, 1 344 %of.zext = zext <2 x i1> %of to <2 x i32> 345 %ret = sub <2 x i32> %sub, %of.zext 346 ret <2 x i32> %ret 347} 348 349define i8 @v_ssubo_i8(i8 %a, i8 %b) { 350; GFX7-LABEL: v_ssubo_i8: 351; GFX7: ; %bb.0: 352; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX7-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 354; GFX7-NEXT: v_bfe_i32 v3, v2, 0, 8 355; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 8 356; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 357; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 8 358; GFX7-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 359; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 360; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 361; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 362; GFX7-NEXT: s_setpc_b64 s[30:31] 363; 364; GFX8-LABEL: v_ssubo_i8: 365; GFX8: ; %bb.0: 366; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 367; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 368; GFX8-NEXT: v_bfe_i32 v3, v2, 0, 8 369; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 8 370; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 371; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 8 372; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 373; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 374; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 375; GFX8-NEXT: v_sub_u16_e32 v0, v2, v0 376; GFX8-NEXT: s_setpc_b64 s[30:31] 377; 378; GFX9-LABEL: v_ssubo_i8: 379; GFX9: ; %bb.0: 380; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381; GFX9-NEXT: v_sub_u16_e32 v2, v0, v1 382; GFX9-NEXT: v_cmp_lt_i32_sdwa s[4:5], sext(v2), sext(v0) src0_sel:BYTE_0 src1_sel:BYTE_0 383; GFX9-NEXT: v_mov_b32_e32 v0, 0 384; GFX9-NEXT: v_cmp_gt_i32_sdwa s[6:7], sext(v1), v0 src0_sel:BYTE_0 src1_sel:DWORD 385; GFX9-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5] 386; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 387; GFX9-NEXT: v_sub_u16_e32 v0, v2, v0 388; GFX9-NEXT: s_setpc_b64 s[30:31] 389 %ssubo = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a, i8 %b) 390 %sub = extractvalue {i8, i1} %ssubo, 0 391 %of = extractvalue {i8, i1} %ssubo, 1 392 %of.zext = zext i1 %of to i8 393 %ret = sub i8 %sub, %of.zext 394 ret i8 %ret 395} 396 397define i7 @v_ssubo_i7(i7 %a, i7 %b) { 398; GFX7-LABEL: v_ssubo_i7: 399; GFX7: ; %bb.0: 400; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 401; GFX7-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 402; GFX7-NEXT: v_bfe_i32 v3, v2, 0, 7 403; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 7 404; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 405; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 7 406; GFX7-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 407; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 408; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 409; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 410; GFX7-NEXT: s_setpc_b64 s[30:31] 411; 412; GFX8-LABEL: v_ssubo_i7: 413; GFX8: ; %bb.0: 414; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 416; GFX8-NEXT: v_bfe_i32 v3, v2, 0, 7 417; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 7 418; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 419; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 7 420; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 421; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 422; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 423; GFX8-NEXT: v_sub_u16_e32 v0, v2, v0 424; GFX8-NEXT: s_setpc_b64 s[30:31] 425; 426; GFX9-LABEL: v_ssubo_i7: 427; GFX9: ; %bb.0: 428; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; GFX9-NEXT: v_sub_u16_e32 v2, v0, v1 430; GFX9-NEXT: v_bfe_i32 v3, v2, 0, 7 431; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 7 432; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 433; GFX9-NEXT: v_bfe_i32 v0, v1, 0, 7 434; GFX9-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 435; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 436; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 437; GFX9-NEXT: v_sub_u16_e32 v0, v2, v0 438; GFX9-NEXT: s_setpc_b64 s[30:31] 439 %ssubo = call {i7, i1} @llvm.ssub.with.overflow.i7(i7 %a, i7 %b) 440 %sub = extractvalue {i7, i1} %ssubo, 0 441 %of = extractvalue {i7, i1} %ssubo, 1 442 %of.zext = zext i1 %of to i7 443 %ret = sub i7 %sub, %of.zext 444 ret i7 %ret 445} 446 447define amdgpu_ps i32 @s_usubo_i32(i32 inreg %a, i32 inreg %b) { 448; GFX7-LABEL: s_usubo_i32: 449; GFX7: ; %bb.0: 450; GFX7-NEXT: s_sub_u32 s0, s0, s1 451; GFX7-NEXT: s_cselect_b32 s1, 1, 0 452; GFX7-NEXT: s_sub_i32 s0, s0, s1 453; GFX7-NEXT: ; return to shader part epilog 454; 455; GFX8-LABEL: s_usubo_i32: 456; GFX8: ; %bb.0: 457; GFX8-NEXT: s_sub_u32 s0, s0, s1 458; GFX8-NEXT: s_cselect_b32 s1, 1, 0 459; GFX8-NEXT: s_sub_i32 s0, s0, s1 460; GFX8-NEXT: ; return to shader part epilog 461; 462; GFX9-LABEL: s_usubo_i32: 463; GFX9: ; %bb.0: 464; GFX9-NEXT: s_sub_u32 s0, s0, s1 465; GFX9-NEXT: s_cselect_b32 s1, 1, 0 466; GFX9-NEXT: s_sub_i32 s0, s0, s1 467; GFX9-NEXT: ; return to shader part epilog 468 %usubo = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 469 %sub = extractvalue {i32, i1} %usubo, 0 470 %of = extractvalue {i32, i1} %usubo, 1 471 %of.zext = zext i1 %of to i32 472 %ret = sub i32 %sub, %of.zext 473 ret i32 %ret 474} 475 476define amdgpu_ps i64 @s_usubo_i64(i64 inreg %a, i64 inreg %b) { 477; GFX7-LABEL: s_usubo_i64: 478; GFX7: ; %bb.0: 479; GFX7-NEXT: s_sub_u32 s0, s0, s2 480; GFX7-NEXT: s_subb_u32 s1, s1, s3 481; GFX7-NEXT: s_cselect_b32 s2, 1, 0 482; GFX7-NEXT: s_sub_u32 s0, s0, s2 483; GFX7-NEXT: s_subb_u32 s1, s1, 0 484; GFX7-NEXT: ; return to shader part epilog 485; 486; GFX8-LABEL: s_usubo_i64: 487; GFX8: ; %bb.0: 488; GFX8-NEXT: s_sub_u32 s0, s0, s2 489; GFX8-NEXT: s_subb_u32 s1, s1, s3 490; GFX8-NEXT: s_cselect_b32 s2, 1, 0 491; GFX8-NEXT: s_sub_u32 s0, s0, s2 492; GFX8-NEXT: s_subb_u32 s1, s1, 0 493; GFX8-NEXT: ; return to shader part epilog 494; 495; GFX9-LABEL: s_usubo_i64: 496; GFX9: ; %bb.0: 497; GFX9-NEXT: s_sub_u32 s0, s0, s2 498; GFX9-NEXT: s_subb_u32 s1, s1, s3 499; GFX9-NEXT: s_cselect_b32 s2, 1, 0 500; GFX9-NEXT: s_sub_u32 s0, s0, s2 501; GFX9-NEXT: s_subb_u32 s1, s1, 0 502; GFX9-NEXT: ; return to shader part epilog 503 %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 504 %sub = extractvalue {i64, i1} %usubo, 0 505 %of = extractvalue {i64, i1} %usubo, 1 506 %of.zext = zext i1 %of to i64 507 %ret = sub i64 %sub, %of.zext 508 ret i64 %ret 509} 510 511define amdgpu_ps <2 x i32> @s_usubo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b) { 512; GFX7-LABEL: s_usubo_v2i32: 513; GFX7: ; %bb.0: 514; GFX7-NEXT: s_sub_u32 s0, s0, s2 515; GFX7-NEXT: s_cselect_b32 s2, 1, 0 516; GFX7-NEXT: s_sub_u32 s1, s1, s3 517; GFX7-NEXT: s_cselect_b32 s3, 1, 0 518; GFX7-NEXT: s_sub_i32 s0, s0, s2 519; GFX7-NEXT: s_sub_i32 s1, s1, s3 520; GFX7-NEXT: ; return to shader part epilog 521; 522; GFX8-LABEL: s_usubo_v2i32: 523; GFX8: ; %bb.0: 524; GFX8-NEXT: s_sub_u32 s0, s0, s2 525; GFX8-NEXT: s_cselect_b32 s2, 1, 0 526; GFX8-NEXT: s_sub_u32 s1, s1, s3 527; GFX8-NEXT: s_cselect_b32 s3, 1, 0 528; GFX8-NEXT: s_sub_i32 s0, s0, s2 529; GFX8-NEXT: s_sub_i32 s1, s1, s3 530; GFX8-NEXT: ; return to shader part epilog 531; 532; GFX9-LABEL: s_usubo_v2i32: 533; GFX9: ; %bb.0: 534; GFX9-NEXT: s_sub_u32 s0, s0, s2 535; GFX9-NEXT: s_cselect_b32 s2, 1, 0 536; GFX9-NEXT: s_sub_u32 s1, s1, s3 537; GFX9-NEXT: s_cselect_b32 s3, 1, 0 538; GFX9-NEXT: s_sub_i32 s0, s0, s2 539; GFX9-NEXT: s_sub_i32 s1, s1, s3 540; GFX9-NEXT: ; return to shader part epilog 541 %usubo = call {<2 x i32>, <2 x i1>} @llvm.usub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) 542 %sub = extractvalue {<2 x i32>, <2 x i1>} %usubo, 0 543 %of = extractvalue {<2 x i32>, <2 x i1>} %usubo, 1 544 %of.zext = zext <2 x i1> %of to <2 x i32> 545 %ret = sub <2 x i32> %sub, %of.zext 546 ret <2 x i32> %ret 547} 548 549define i8 @s_usubo_i8(i8 %a, i8 %b) { 550; GFX7-LABEL: s_usubo_i8: 551; GFX7: ; %bb.0: 552; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 553; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0 554; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v1 555; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 556; GFX7-NEXT: v_and_b32_e32 v1, 0xff, v0 557; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 558; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 559; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 560; GFX7-NEXT: s_setpc_b64 s[30:31] 561; 562; GFX8-LABEL: s_usubo_i8: 563; GFX8: ; %bb.0: 564; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 565; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0 566; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 567; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 568; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v0 569; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 570; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 571; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 572; GFX8-NEXT: s_setpc_b64 s[30:31] 573; 574; GFX9-LABEL: s_usubo_i8: 575; GFX9: ; %bb.0: 576; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX9-NEXT: v_sub_u32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 578; GFX9-NEXT: v_cmp_ne_u32_sdwa s[4:5], v0, v0 src0_sel:DWORD src1_sel:BYTE_0 579; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 580; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1 581; GFX9-NEXT: s_setpc_b64 s[30:31] 582 %usubo = call {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b) 583 %sub = extractvalue {i8, i1} %usubo, 0 584 %of = extractvalue {i8, i1} %usubo, 1 585 %of.zext = zext i1 %of to i8 586 %ret = sub i8 %sub, %of.zext 587 ret i8 %ret 588} 589 590define i7 @s_usubo_i7(i7 %a, i7 %b) { 591; GFX7-LABEL: s_usubo_i7: 592; GFX7: ; %bb.0: 593; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 594; GFX7-NEXT: v_and_b32_e32 v0, 0x7f, v0 595; GFX7-NEXT: v_and_b32_e32 v1, 0x7f, v1 596; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 597; GFX7-NEXT: v_and_b32_e32 v1, 0x7f, v0 598; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 599; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 600; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 601; GFX7-NEXT: s_setpc_b64 s[30:31] 602; 603; GFX8-LABEL: s_usubo_i7: 604; GFX8: ; %bb.0: 605; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 606; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0 607; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 608; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 609; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v0 610; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 611; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 612; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 613; GFX8-NEXT: s_setpc_b64 s[30:31] 614; 615; GFX9-LABEL: s_usubo_i7: 616; GFX9: ; %bb.0: 617; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 618; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0 619; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 620; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 621; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v0 622; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 623; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 624; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1 625; GFX9-NEXT: s_setpc_b64 s[30:31] 626 %usubo = call {i7, i1} @llvm.usub.with.overflow.i7(i7 %a, i7 %b) 627 %sub = extractvalue {i7, i1} %usubo, 0 628 %of = extractvalue {i7, i1} %usubo, 1 629 %of.zext = zext i1 %of to i7 630 %ret = sub i7 %sub, %of.zext 631 ret i7 %ret 632} 633 634define amdgpu_ps i32 @s_ssubo_i32(i32 inreg %a, i32 inreg %b) { 635; GFX7-LABEL: s_ssubo_i32: 636; GFX7: ; %bb.0: 637; GFX7-NEXT: s_sub_i32 s2, s0, s1 638; GFX7-NEXT: s_cmp_lt_i32 s2, s0 639; GFX7-NEXT: s_cselect_b32 s0, 1, 0 640; GFX7-NEXT: s_cmp_gt_i32 s1, 0 641; GFX7-NEXT: s_cselect_b32 s1, 1, 0 642; GFX7-NEXT: s_xor_b32 s0, s1, s0 643; GFX7-NEXT: s_and_b32 s0, s0, 1 644; GFX7-NEXT: s_sub_i32 s0, s2, s0 645; GFX7-NEXT: ; return to shader part epilog 646; 647; GFX8-LABEL: s_ssubo_i32: 648; GFX8: ; %bb.0: 649; GFX8-NEXT: s_sub_i32 s2, s0, s1 650; GFX8-NEXT: s_cmp_lt_i32 s2, s0 651; GFX8-NEXT: s_cselect_b32 s0, 1, 0 652; GFX8-NEXT: s_cmp_gt_i32 s1, 0 653; GFX8-NEXT: s_cselect_b32 s1, 1, 0 654; GFX8-NEXT: s_xor_b32 s0, s1, s0 655; GFX8-NEXT: s_and_b32 s0, s0, 1 656; GFX8-NEXT: s_sub_i32 s0, s2, s0 657; GFX8-NEXT: ; return to shader part epilog 658; 659; GFX9-LABEL: s_ssubo_i32: 660; GFX9: ; %bb.0: 661; GFX9-NEXT: s_sub_i32 s2, s0, s1 662; GFX9-NEXT: s_cmp_lt_i32 s2, s0 663; GFX9-NEXT: s_cselect_b32 s0, 1, 0 664; GFX9-NEXT: s_cmp_gt_i32 s1, 0 665; GFX9-NEXT: s_cselect_b32 s1, 1, 0 666; GFX9-NEXT: s_xor_b32 s0, s1, s0 667; GFX9-NEXT: s_and_b32 s0, s0, 1 668; GFX9-NEXT: s_sub_i32 s0, s2, s0 669; GFX9-NEXT: ; return to shader part epilog 670 %ssubo = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 671 %sub = extractvalue {i32, i1} %ssubo, 0 672 %of = extractvalue {i32, i1} %ssubo, 1 673 %of.zext = zext i1 %of to i32 674 %ret = sub i32 %sub, %of.zext 675 ret i32 %ret 676} 677 678define amdgpu_ps i64 @s_ssubo_i64(i64 inreg %a, i64 inreg %b) { 679; GFX7-LABEL: s_ssubo_i64: 680; GFX7: ; %bb.0: 681; GFX7-NEXT: s_sub_u32 s4, s0, s2 682; GFX7-NEXT: v_mov_b32_e32 v0, s0 683; GFX7-NEXT: s_subb_u32 s5, s1, s3 684; GFX7-NEXT: v_mov_b32_e32 v1, s1 685; GFX7-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1] 686; GFX7-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 687; GFX7-NEXT: v_mov_b32_e32 v1, s5 688; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 689; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 690; GFX7-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 691; GFX7-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 692; GFX7-NEXT: v_readfirstlane_b32 s0, v0 693; GFX7-NEXT: v_readfirstlane_b32 s1, v1 694; GFX7-NEXT: ; return to shader part epilog 695; 696; GFX8-LABEL: s_ssubo_i64: 697; GFX8: ; %bb.0: 698; GFX8-NEXT: s_sub_u32 s4, s0, s2 699; GFX8-NEXT: v_mov_b32_e32 v0, s0 700; GFX8-NEXT: s_subb_u32 s5, s1, s3 701; GFX8-NEXT: v_mov_b32_e32 v1, s1 702; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1] 703; GFX8-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 704; GFX8-NEXT: v_mov_b32_e32 v1, s5 705; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 706; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 707; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 708; GFX8-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 709; GFX8-NEXT: v_readfirstlane_b32 s0, v0 710; GFX8-NEXT: v_readfirstlane_b32 s1, v1 711; GFX8-NEXT: ; return to shader part epilog 712; 713; GFX9-LABEL: s_ssubo_i64: 714; GFX9: ; %bb.0: 715; GFX9-NEXT: s_sub_u32 s4, s0, s2 716; GFX9-NEXT: v_mov_b32_e32 v0, s0 717; GFX9-NEXT: s_subb_u32 s5, s1, s3 718; GFX9-NEXT: v_mov_b32_e32 v1, s1 719; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1] 720; GFX9-NEXT: v_cmp_gt_i64_e64 s[0:1], s[2:3], 0 721; GFX9-NEXT: v_mov_b32_e32 v1, s5 722; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 723; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 724; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s4, v0 725; GFX9-NEXT: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc 726; GFX9-NEXT: v_readfirstlane_b32 s0, v0 727; GFX9-NEXT: v_readfirstlane_b32 s1, v1 728; GFX9-NEXT: ; return to shader part epilog 729 %ssubo = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) 730 %sub = extractvalue {i64, i1} %ssubo, 0 731 %of = extractvalue {i64, i1} %ssubo, 1 732 %of.zext = zext i1 %of to i64 733 %ret = sub i64 %sub, %of.zext 734 ret i64 %ret 735} 736 737define amdgpu_ps <2 x i32> @s_ssubo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b) { 738; GFX7-LABEL: s_ssubo_v2i32: 739; GFX7: ; %bb.0: 740; GFX7-NEXT: s_sub_i32 s4, s0, s2 741; GFX7-NEXT: s_sub_i32 s5, s1, s3 742; GFX7-NEXT: s_cmp_lt_i32 s4, s0 743; GFX7-NEXT: s_cselect_b32 s0, 1, 0 744; GFX7-NEXT: s_cmp_lt_i32 s5, s1 745; GFX7-NEXT: s_cselect_b32 s1, 1, 0 746; GFX7-NEXT: s_cmp_gt_i32 s2, 0 747; GFX7-NEXT: s_cselect_b32 s2, 1, 0 748; GFX7-NEXT: s_cmp_gt_i32 s3, 0 749; GFX7-NEXT: s_cselect_b32 s3, 1, 0 750; GFX7-NEXT: s_xor_b32 s0, s2, s0 751; GFX7-NEXT: s_xor_b32 s1, s3, s1 752; GFX7-NEXT: s_and_b32 s0, s0, 1 753; GFX7-NEXT: s_and_b32 s1, s1, 1 754; GFX7-NEXT: s_sub_i32 s0, s4, s0 755; GFX7-NEXT: s_sub_i32 s1, s5, s1 756; GFX7-NEXT: ; return to shader part epilog 757; 758; GFX8-LABEL: s_ssubo_v2i32: 759; GFX8: ; %bb.0: 760; GFX8-NEXT: s_sub_i32 s4, s0, s2 761; GFX8-NEXT: s_sub_i32 s5, s1, s3 762; GFX8-NEXT: s_cmp_lt_i32 s4, s0 763; GFX8-NEXT: s_cselect_b32 s0, 1, 0 764; GFX8-NEXT: s_cmp_lt_i32 s5, s1 765; GFX8-NEXT: s_cselect_b32 s1, 1, 0 766; GFX8-NEXT: s_cmp_gt_i32 s2, 0 767; GFX8-NEXT: s_cselect_b32 s2, 1, 0 768; GFX8-NEXT: s_cmp_gt_i32 s3, 0 769; GFX8-NEXT: s_cselect_b32 s3, 1, 0 770; GFX8-NEXT: s_xor_b32 s0, s2, s0 771; GFX8-NEXT: s_xor_b32 s1, s3, s1 772; GFX8-NEXT: s_and_b32 s0, s0, 1 773; GFX8-NEXT: s_and_b32 s1, s1, 1 774; GFX8-NEXT: s_sub_i32 s0, s4, s0 775; GFX8-NEXT: s_sub_i32 s1, s5, s1 776; GFX8-NEXT: ; return to shader part epilog 777; 778; GFX9-LABEL: s_ssubo_v2i32: 779; GFX9: ; %bb.0: 780; GFX9-NEXT: s_sub_i32 s4, s0, s2 781; GFX9-NEXT: s_sub_i32 s5, s1, s3 782; GFX9-NEXT: s_cmp_lt_i32 s4, s0 783; GFX9-NEXT: s_cselect_b32 s0, 1, 0 784; GFX9-NEXT: s_cmp_lt_i32 s5, s1 785; GFX9-NEXT: s_cselect_b32 s1, 1, 0 786; GFX9-NEXT: s_cmp_gt_i32 s2, 0 787; GFX9-NEXT: s_cselect_b32 s2, 1, 0 788; GFX9-NEXT: s_cmp_gt_i32 s3, 0 789; GFX9-NEXT: s_cselect_b32 s3, 1, 0 790; GFX9-NEXT: s_xor_b32 s0, s2, s0 791; GFX9-NEXT: s_xor_b32 s1, s3, s1 792; GFX9-NEXT: s_and_b32 s0, s0, 1 793; GFX9-NEXT: s_and_b32 s1, s1, 1 794; GFX9-NEXT: s_sub_i32 s0, s4, s0 795; GFX9-NEXT: s_sub_i32 s1, s5, s1 796; GFX9-NEXT: ; return to shader part epilog 797 %ssubo = call {<2 x i32>, <2 x i1>} @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) 798 %sub = extractvalue {<2 x i32>, <2 x i1>} %ssubo, 0 799 %of = extractvalue {<2 x i32>, <2 x i1>} %ssubo, 1 800 %of.zext = zext <2 x i1> %of to <2 x i32> 801 %ret = sub <2 x i32> %sub, %of.zext 802 ret <2 x i32> %ret 803} 804 805define i8 @s_ssubo_i8(i8 %a, i8 %b) { 806; GFX7-LABEL: s_ssubo_i8: 807; GFX7: ; %bb.0: 808; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 809; GFX7-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 810; GFX7-NEXT: v_bfe_i32 v3, v2, 0, 8 811; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 8 812; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 813; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 8 814; GFX7-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 815; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 816; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 817; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 818; GFX7-NEXT: s_setpc_b64 s[30:31] 819; 820; GFX8-LABEL: s_ssubo_i8: 821; GFX8: ; %bb.0: 822; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 823; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 824; GFX8-NEXT: v_bfe_i32 v3, v2, 0, 8 825; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 8 826; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 827; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 8 828; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 829; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 830; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 831; GFX8-NEXT: v_sub_u16_e32 v0, v2, v0 832; GFX8-NEXT: s_setpc_b64 s[30:31] 833; 834; GFX9-LABEL: s_ssubo_i8: 835; GFX9: ; %bb.0: 836; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 837; GFX9-NEXT: v_sub_u16_e32 v2, v0, v1 838; GFX9-NEXT: v_cmp_lt_i32_sdwa s[4:5], sext(v2), sext(v0) src0_sel:BYTE_0 src1_sel:BYTE_0 839; GFX9-NEXT: v_mov_b32_e32 v0, 0 840; GFX9-NEXT: v_cmp_gt_i32_sdwa s[6:7], sext(v1), v0 src0_sel:BYTE_0 src1_sel:DWORD 841; GFX9-NEXT: s_xor_b64 s[4:5], s[6:7], s[4:5] 842; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 843; GFX9-NEXT: v_sub_u16_e32 v0, v2, v0 844; GFX9-NEXT: s_setpc_b64 s[30:31] 845 %ssubo = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a, i8 %b) 846 %sub = extractvalue {i8, i1} %ssubo, 0 847 %of = extractvalue {i8, i1} %ssubo, 1 848 %of.zext = zext i1 %of to i8 849 %ret = sub i8 %sub, %of.zext 850 ret i8 %ret 851} 852 853define i7 @s_ssubo_i7(i7 %a, i7 %b) { 854; GFX7-LABEL: s_ssubo_i7: 855; GFX7: ; %bb.0: 856; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 857; GFX7-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 858; GFX7-NEXT: v_bfe_i32 v3, v2, 0, 7 859; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 7 860; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 861; GFX7-NEXT: v_bfe_i32 v0, v1, 0, 7 862; GFX7-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 863; GFX7-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 864; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 865; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 866; GFX7-NEXT: s_setpc_b64 s[30:31] 867; 868; GFX8-LABEL: s_ssubo_i7: 869; GFX8: ; %bb.0: 870; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 871; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 872; GFX8-NEXT: v_bfe_i32 v3, v2, 0, 7 873; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 7 874; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 875; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 7 876; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 877; GFX8-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 878; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 879; GFX8-NEXT: v_sub_u16_e32 v0, v2, v0 880; GFX8-NEXT: s_setpc_b64 s[30:31] 881; 882; GFX9-LABEL: s_ssubo_i7: 883; GFX9: ; %bb.0: 884; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 885; GFX9-NEXT: v_sub_u16_e32 v2, v0, v1 886; GFX9-NEXT: v_bfe_i32 v3, v2, 0, 7 887; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 7 888; GFX9-NEXT: v_cmp_lt_i32_e32 vcc, v3, v0 889; GFX9-NEXT: v_bfe_i32 v0, v1, 0, 7 890; GFX9-NEXT: v_cmp_lt_i32_e64 s[4:5], 0, v0 891; GFX9-NEXT: s_xor_b64 s[4:5], s[4:5], vcc 892; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 893; GFX9-NEXT: v_sub_u16_e32 v0, v2, v0 894; GFX9-NEXT: s_setpc_b64 s[30:31] 895 %ssubo = call {i7, i1} @llvm.ssub.with.overflow.i7(i7 %a, i7 %b) 896 %sub = extractvalue {i7, i1} %ssubo, 0 897 %of = extractvalue {i7, i1} %ssubo, 1 898 %of.zext = zext i1 %of to i7 899 %ret = sub i7 %sub, %of.zext 900 ret i7 %ret 901} 902 903define amdgpu_ps i32 @usubo_i32_sv(i32 inreg %a, i32 %b) { 904; GFX7-LABEL: usubo_i32_sv: 905; GFX7: ; %bb.0: 906; GFX7-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 907; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 908; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 909; GFX7-NEXT: v_readfirstlane_b32 s0, v0 910; GFX7-NEXT: ; return to shader part epilog 911; 912; GFX8-LABEL: usubo_i32_sv: 913; GFX8: ; %bb.0: 914; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s0, v0 915; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 916; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 917; GFX8-NEXT: v_readfirstlane_b32 s0, v0 918; GFX8-NEXT: ; return to shader part epilog 919; 920; GFX9-LABEL: usubo_i32_sv: 921; GFX9: ; %bb.0: 922; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s0, v0 923; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 924; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 925; GFX9-NEXT: v_readfirstlane_b32 s0, v0 926; GFX9-NEXT: ; return to shader part epilog 927 %usubo = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 928 %sub = extractvalue {i32, i1} %usubo, 0 929 %of = extractvalue {i32, i1} %usubo, 1 930 %of.zext = zext i1 %of to i32 931 %ret = sub i32 %sub, %of.zext 932 ret i32 %ret 933} 934 935define amdgpu_ps i16 @usubo_i16_sv(i16 inreg %a, i16 %b) { 936; GFX7-LABEL: usubo_i16_sv: 937; GFX7: ; %bb.0: 938; GFX7-NEXT: s_and_b32 s0, s0, 0xffff 939; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 940; GFX7-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 941; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0 942; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 943; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 944; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 945; GFX7-NEXT: v_readfirstlane_b32 s0, v0 946; GFX7-NEXT: ; return to shader part epilog 947; 948; GFX8-LABEL: usubo_i16_sv: 949; GFX8: ; %bb.0: 950; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 951; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 952; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s0, v0 953; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v0 954; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, v0, v1 955; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 956; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 957; GFX8-NEXT: v_readfirstlane_b32 s0, v0 958; GFX8-NEXT: ; return to shader part epilog 959; 960; GFX9-LABEL: usubo_i16_sv: 961; GFX9: ; %bb.0: 962; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 963; GFX9-NEXT: v_sub_u32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 964; GFX9-NEXT: v_cmp_ne_u32_sdwa s[0:1], v0, v0 src0_sel:DWORD src1_sel:WORD_0 965; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 966; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1 967; GFX9-NEXT: v_readfirstlane_b32 s0, v0 968; GFX9-NEXT: ; return to shader part epilog 969 %usubo = call {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b) 970 %sub = extractvalue {i16, i1} %usubo, 0 971 %of = extractvalue {i16, i1} %usubo, 1 972 %of.zext = zext i1 %of to i16 973 %ret = sub i16 %sub, %of.zext 974 ret i16 %ret 975} 976 977define amdgpu_ps i32 @ssubo_i32_sv(i32 inreg %a, i32 %b) { 978; GFX7-LABEL: ssubo_i32_sv: 979; GFX7: ; %bb.0: 980; GFX7-NEXT: v_sub_i32_e32 v1, vcc, s0, v0 981; GFX7-NEXT: v_cmp_gt_i32_e32 vcc, s0, v1 982; GFX7-NEXT: v_cmp_lt_i32_e64 s[0:1], 0, v0 983; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 984; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 985; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 986; GFX7-NEXT: v_readfirstlane_b32 s0, v0 987; GFX7-NEXT: ; return to shader part epilog 988; 989; GFX8-LABEL: ssubo_i32_sv: 990; GFX8: ; %bb.0: 991; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s0, v0 992; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, s0, v1 993; GFX8-NEXT: v_cmp_lt_i32_e64 s[0:1], 0, v0 994; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 995; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 996; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v1, v0 997; GFX8-NEXT: v_readfirstlane_b32 s0, v0 998; GFX8-NEXT: ; return to shader part epilog 999; 1000; GFX9-LABEL: ssubo_i32_sv: 1001; GFX9: ; %bb.0: 1002; GFX9-NEXT: v_sub_u32_e32 v1, s0, v0 1003; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, s0, v1 1004; GFX9-NEXT: v_cmp_lt_i32_e64 s[0:1], 0, v0 1005; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 1006; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 1007; GFX9-NEXT: v_sub_u32_e32 v0, v1, v0 1008; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1009; GFX9-NEXT: ; return to shader part epilog 1010 %ssubo = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 1011 %sub = extractvalue {i32, i1} %ssubo, 0 1012 %of = extractvalue {i32, i1} %ssubo, 1 1013 %of.zext = zext i1 %of to i32 1014 %ret = sub i32 %sub, %of.zext 1015 ret i32 %ret 1016} 1017 1018define amdgpu_ps i16 @ssubo_i16_sv(i16 inreg %a, i16 %b) { 1019; GFX7-LABEL: ssubo_i16_sv: 1020; GFX7: ; %bb.0: 1021; GFX7-NEXT: v_sub_i32_e32 v1, vcc, s0, v0 1022; GFX7-NEXT: v_bfe_i32 v2, v1, 0, 16 1023; GFX7-NEXT: s_sext_i32_i16 s0, s0 1024; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 1025; GFX7-NEXT: v_cmp_gt_i32_e32 vcc, s0, v2 1026; GFX7-NEXT: v_cmp_lt_i32_e64 s[0:1], 0, v0 1027; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 1028; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 1029; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 1030; GFX7-NEXT: v_readfirstlane_b32 s0, v0 1031; GFX7-NEXT: ; return to shader part epilog 1032; 1033; GFX8-LABEL: ssubo_i16_sv: 1034; GFX8: ; %bb.0: 1035; GFX8-NEXT: v_sub_u16_e32 v1, s0, v0 1036; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, s0, v1 1037; GFX8-NEXT: v_cmp_lt_i16_e64 s[0:1], 0, v0 1038; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 1039; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 1040; GFX8-NEXT: v_sub_u16_e32 v0, v1, v0 1041; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1042; GFX8-NEXT: ; return to shader part epilog 1043; 1044; GFX9-LABEL: ssubo_i16_sv: 1045; GFX9: ; %bb.0: 1046; GFX9-NEXT: v_sub_u16_e32 v1, s0, v0 1047; GFX9-NEXT: v_cmp_gt_i16_e32 vcc, s0, v1 1048; GFX9-NEXT: v_cmp_lt_i16_e64 s[0:1], 0, v0 1049; GFX9-NEXT: s_xor_b64 s[0:1], s[0:1], vcc 1050; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 1051; GFX9-NEXT: v_sub_u16_e32 v0, v1, v0 1052; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1053; GFX9-NEXT: ; return to shader part epilog 1054 %ssubo = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b) 1055 %sub = extractvalue {i16, i1} %ssubo, 0 1056 %of = extractvalue {i16, i1} %ssubo, 1 1057 %of.zext = zext i1 %of to i16 1058 %ret = sub i16 %sub, %of.zext 1059 ret i16 %ret 1060} 1061 1062declare {i7, i1} @llvm.usub.with.overflow.i7(i7 %a, i7 %b) 1063declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b) 1064declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b) 1065declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 1066declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 1067declare {<2 x i32>, <2 x i1>} @llvm.usub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) 1068 1069declare {i7, i1} @llvm.ssub.with.overflow.i7(i7 %a, i7 %b) 1070declare {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a, i8 %b) 1071declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b) 1072declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) 1073declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) 1074declare {<2 x i32>, <2 x i1>} @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) 1075