1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; 5; SABD 6; 7 8define <8 x i8> @sabd_8b_as_16b(<8 x i8> %a, <8 x i8> %b) { 9; 10; CHECK-LABEL: sabd_8b_as_16b: 11; CHECK: # %bb.0: 12; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 13; CHECK-NEXT: vmin.vv v10, v8, v9 14; CHECK-NEXT: vmax.vv v8, v8, v9 15; CHECK-NEXT: vsub.vv v8, v8, v10 16; CHECK-NEXT: ret 17 %a.sext = sext <8 x i8> %a to <8 x i16> 18 %b.sext = sext <8 x i8> %b to <8 x i16> 19 %sub = sub <8 x i16> %a.sext, %b.sext 20 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 21 %trunc = trunc <8 x i16> %abs to <8 x i8> 22 ret <8 x i8> %trunc 23} 24 25define <8 x i8> @sabd_8b_as_32b(<8 x i8> %a, <8 x i8> %b) { 26; 27; CHECK-LABEL: sabd_8b_as_32b: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 30; CHECK-NEXT: vmin.vv v10, v8, v9 31; CHECK-NEXT: vmax.vv v8, v8, v9 32; CHECK-NEXT: vsub.vv v8, v8, v10 33; CHECK-NEXT: ret 34 %a.sext = sext <8 x i8> %a to <8 x i32> 35 %b.sext = sext <8 x i8> %b to <8 x i32> 36 %sub = sub <8 x i32> %a.sext, %b.sext 37 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true) 38 %trunc = trunc <8 x i32> %abs to <8 x i8> 39 ret <8 x i8> %trunc 40} 41 42define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) { 43; 44; CHECK-LABEL: sabd_16b: 45; CHECK: # %bb.0: 46; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 47; CHECK-NEXT: vmin.vv v10, v8, v9 48; CHECK-NEXT: vmax.vv v8, v8, v9 49; CHECK-NEXT: vsub.vv v8, v8, v10 50; CHECK-NEXT: ret 51 %a.sext = sext <16 x i8> %a to <16 x i16> 52 %b.sext = sext <16 x i8> %b to <16 x i16> 53 %sub = sub <16 x i16> %a.sext, %b.sext 54 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true) 55 %trunc = trunc <16 x i16> %abs to <16 x i8> 56 ret <16 x i8> %trunc 57} 58 59define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) { 60; 61; CHECK-LABEL: sabd_4h: 62; CHECK: # %bb.0: 63; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 64; CHECK-NEXT: vmin.vv v10, v8, v9 65; CHECK-NEXT: vmax.vv v8, v8, v9 66; CHECK-NEXT: vsub.vv v8, v8, v10 67; CHECK-NEXT: ret 68 %a.sext = sext <4 x i16> %a to <4 x i32> 69 %b.sext = sext <4 x i16> %b to <4 x i32> 70 %sub = sub <4 x i32> %a.sext, %b.sext 71 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 72 %trunc = trunc <4 x i32> %abs to <4 x i16> 73 ret <4 x i16> %trunc 74} 75 76define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) { 77; 78; CHECK-LABEL: sabd_4h_promoted_ops: 79; CHECK: # %bb.0: 80; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 81; CHECK-NEXT: vmin.vv v10, v8, v9 82; CHECK-NEXT: vmax.vv v8, v8, v9 83; CHECK-NEXT: vsub.vv v9, v8, v10 84; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 85; CHECK-NEXT: vzext.vf2 v8, v9 86; CHECK-NEXT: ret 87 %a.sext = sext <4 x i8> %a to <4 x i16> 88 %b.sext = sext <4 x i8> %b to <4 x i16> 89 %sub = sub <4 x i16> %a.sext, %b.sext 90 %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true) 91 ret <4 x i16> %abs 92} 93 94define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) { 95; 96; CHECK-LABEL: sabd_8h: 97; CHECK: # %bb.0: 98; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 99; CHECK-NEXT: vmin.vv v10, v8, v9 100; CHECK-NEXT: vmax.vv v8, v8, v9 101; CHECK-NEXT: vsub.vv v8, v8, v10 102; CHECK-NEXT: ret 103 %a.sext = sext <8 x i16> %a to <8 x i32> 104 %b.sext = sext <8 x i16> %b to <8 x i32> 105 %sub = sub <8 x i32> %a.sext, %b.sext 106 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true) 107 %trunc = trunc <8 x i32> %abs to <8 x i16> 108 ret <8 x i16> %trunc 109} 110 111define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) { 112; 113; CHECK-LABEL: sabd_8h_promoted_ops: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 116; CHECK-NEXT: vmin.vv v10, v8, v9 117; CHECK-NEXT: vmax.vv v8, v8, v9 118; CHECK-NEXT: vsub.vv v9, v8, v10 119; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 120; CHECK-NEXT: vzext.vf2 v8, v9 121; CHECK-NEXT: ret 122 %a.sext = sext <8 x i8> %a to <8 x i16> 123 %b.sext = sext <8 x i8> %b to <8 x i16> 124 %sub = sub <8 x i16> %a.sext, %b.sext 125 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 126 ret <8 x i16> %abs 127} 128 129define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) { 130; 131; CHECK-LABEL: sabd_2s: 132; CHECK: # %bb.0: 133; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 134; CHECK-NEXT: vmin.vv v10, v8, v9 135; CHECK-NEXT: vmax.vv v8, v8, v9 136; CHECK-NEXT: vsub.vv v8, v8, v10 137; CHECK-NEXT: ret 138 %a.sext = sext <2 x i32> %a to <2 x i64> 139 %b.sext = sext <2 x i32> %b to <2 x i64> 140 %sub = sub <2 x i64> %a.sext, %b.sext 141 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 142 %trunc = trunc <2 x i64> %abs to <2 x i32> 143 ret <2 x i32> %trunc 144} 145 146define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) { 147; 148; CHECK-LABEL: sabd_2s_promoted_ops: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 151; CHECK-NEXT: vmin.vv v10, v8, v9 152; CHECK-NEXT: vmax.vv v8, v8, v9 153; CHECK-NEXT: vsub.vv v9, v8, v10 154; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 155; CHECK-NEXT: vzext.vf2 v8, v9 156; CHECK-NEXT: ret 157 %a.sext = sext <2 x i16> %a to <2 x i32> 158 %b.sext = sext <2 x i16> %b to <2 x i32> 159 %sub = sub <2 x i32> %a.sext, %b.sext 160 %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true) 161 ret <2 x i32> %abs 162} 163 164define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) { 165; 166; CHECK-LABEL: sabd_4s: 167; CHECK: # %bb.0: 168; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 169; CHECK-NEXT: vmin.vv v10, v8, v9 170; CHECK-NEXT: vmax.vv v8, v8, v9 171; CHECK-NEXT: vsub.vv v8, v8, v10 172; CHECK-NEXT: ret 173 %a.sext = sext <4 x i32> %a to <4 x i64> 174 %b.sext = sext <4 x i32> %b to <4 x i64> 175 %sub = sub <4 x i64> %a.sext, %b.sext 176 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true) 177 %trunc = trunc <4 x i64> %abs to <4 x i32> 178 ret <4 x i32> %trunc 179} 180 181define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) { 182; 183; CHECK-LABEL: sabd_4s_promoted_ops: 184; CHECK: # %bb.0: 185; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 186; CHECK-NEXT: vmin.vv v10, v8, v9 187; CHECK-NEXT: vmax.vv v8, v8, v9 188; CHECK-NEXT: vsub.vv v9, v8, v10 189; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 190; CHECK-NEXT: vzext.vf2 v8, v9 191; CHECK-NEXT: ret 192 %a.sext = sext <4 x i16> %a to <4 x i32> 193 %b.sext = sext <4 x i16> %b to <4 x i32> 194 %sub = sub <4 x i32> %a.sext, %b.sext 195 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 196 ret <4 x i32> %abs 197} 198 199define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) { 200; CHECK-LABEL: sabd_2d: 201; CHECK: # %bb.0: 202; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 203; CHECK-NEXT: vmin.vv v10, v8, v9 204; CHECK-NEXT: vmax.vv v8, v8, v9 205; CHECK-NEXT: vsub.vv v8, v8, v10 206; CHECK-NEXT: ret 207 %a.sext = sext <2 x i64> %a to <2 x i128> 208 %b.sext = sext <2 x i64> %b to <2 x i128> 209 %sub = sub <2 x i128> %a.sext, %b.sext 210 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true) 211 %trunc = trunc <2 x i128> %abs to <2 x i64> 212 ret <2 x i64> %trunc 213} 214 215define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) { 216; 217; CHECK-LABEL: sabd_2d_promoted_ops: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 220; CHECK-NEXT: vmin.vv v10, v8, v9 221; CHECK-NEXT: vmax.vv v8, v8, v9 222; CHECK-NEXT: vsub.vv v9, v8, v10 223; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 224; CHECK-NEXT: vzext.vf2 v8, v9 225; CHECK-NEXT: ret 226 %a.sext = sext <2 x i32> %a to <2 x i64> 227 %b.sext = sext <2 x i32> %b to <2 x i64> 228 %sub = sub <2 x i64> %a.sext, %b.sext 229 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 230 ret <2 x i64> %abs 231} 232 233; 234; UABD 235; 236 237define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) { 238; 239; CHECK-LABEL: uabd_8b: 240; CHECK: # %bb.0: 241; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 242; CHECK-NEXT: vminu.vv v10, v8, v9 243; CHECK-NEXT: vmaxu.vv v8, v8, v9 244; CHECK-NEXT: vsub.vv v8, v8, v10 245; CHECK-NEXT: ret 246 %a.zext = zext <8 x i8> %a to <8 x i16> 247 %b.zext = zext <8 x i8> %b to <8 x i16> 248 %sub = sub <8 x i16> %a.zext, %b.zext 249 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 250 %trunc = trunc <8 x i16> %abs to <8 x i8> 251 ret <8 x i8> %trunc 252} 253 254define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) { 255; 256; CHECK-LABEL: uabd_16b: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 259; CHECK-NEXT: vminu.vv v10, v8, v9 260; CHECK-NEXT: vmaxu.vv v8, v8, v9 261; CHECK-NEXT: vsub.vv v8, v8, v10 262; CHECK-NEXT: ret 263 %a.zext = zext <16 x i8> %a to <16 x i16> 264 %b.zext = zext <16 x i8> %b to <16 x i16> 265 %sub = sub <16 x i16> %a.zext, %b.zext 266 %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true) 267 %trunc = trunc <16 x i16> %abs to <16 x i8> 268 ret <16 x i8> %trunc 269} 270 271define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) { 272; 273; CHECK-LABEL: uabd_4h: 274; CHECK: # %bb.0: 275; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 276; CHECK-NEXT: vminu.vv v10, v8, v9 277; CHECK-NEXT: vmaxu.vv v8, v8, v9 278; CHECK-NEXT: vsub.vv v8, v8, v10 279; CHECK-NEXT: ret 280 %a.zext = zext <4 x i16> %a to <4 x i32> 281 %b.zext = zext <4 x i16> %b to <4 x i32> 282 %sub = sub <4 x i32> %a.zext, %b.zext 283 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 284 %trunc = trunc <4 x i32> %abs to <4 x i16> 285 ret <4 x i16> %trunc 286} 287 288define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) { 289; 290; CHECK-LABEL: uabd_4h_promoted_ops: 291; CHECK: # %bb.0: 292; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 293; CHECK-NEXT: vminu.vv v10, v8, v9 294; CHECK-NEXT: vmaxu.vv v8, v8, v9 295; CHECK-NEXT: vsub.vv v9, v8, v10 296; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma 297; CHECK-NEXT: vzext.vf2 v8, v9 298; CHECK-NEXT: ret 299 %a.zext = zext <4 x i8> %a to <4 x i16> 300 %b.zext = zext <4 x i8> %b to <4 x i16> 301 %sub = sub <4 x i16> %a.zext, %b.zext 302 %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true) 303 ret <4 x i16> %abs 304} 305 306define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) { 307; 308; CHECK-LABEL: uabd_8h: 309; CHECK: # %bb.0: 310; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 311; CHECK-NEXT: vminu.vv v10, v8, v9 312; CHECK-NEXT: vmaxu.vv v8, v8, v9 313; CHECK-NEXT: vsub.vv v8, v8, v10 314; CHECK-NEXT: ret 315 %a.zext = zext <8 x i16> %a to <8 x i32> 316 %b.zext = zext <8 x i16> %b to <8 x i32> 317 %sub = sub <8 x i32> %a.zext, %b.zext 318 %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true) 319 %trunc = trunc <8 x i32> %abs to <8 x i16> 320 ret <8 x i16> %trunc 321} 322 323define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) { 324; 325; CHECK-LABEL: uabd_8h_promoted_ops: 326; CHECK: # %bb.0: 327; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 328; CHECK-NEXT: vminu.vv v10, v8, v9 329; CHECK-NEXT: vmaxu.vv v8, v8, v9 330; CHECK-NEXT: vsub.vv v9, v8, v10 331; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 332; CHECK-NEXT: vzext.vf2 v8, v9 333; CHECK-NEXT: ret 334 %a.zext = zext <8 x i8> %a to <8 x i16> 335 %b.zext = zext <8 x i8> %b to <8 x i16> 336 %sub = sub <8 x i16> %a.zext, %b.zext 337 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 338 ret <8 x i16> %abs 339} 340 341define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) { 342; 343; CHECK-LABEL: uabd_2s: 344; CHECK: # %bb.0: 345; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 346; CHECK-NEXT: vminu.vv v10, v8, v9 347; CHECK-NEXT: vmaxu.vv v8, v8, v9 348; CHECK-NEXT: vsub.vv v8, v8, v10 349; CHECK-NEXT: ret 350 %a.zext = zext <2 x i32> %a to <2 x i64> 351 %b.zext = zext <2 x i32> %b to <2 x i64> 352 %sub = sub <2 x i64> %a.zext, %b.zext 353 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 354 %trunc = trunc <2 x i64> %abs to <2 x i32> 355 ret <2 x i32> %trunc 356} 357 358define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) { 359; 360; CHECK-LABEL: uabd_2s_promoted_ops: 361; CHECK: # %bb.0: 362; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma 363; CHECK-NEXT: vminu.vv v10, v8, v9 364; CHECK-NEXT: vmaxu.vv v8, v8, v9 365; CHECK-NEXT: vsub.vv v9, v8, v10 366; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma 367; CHECK-NEXT: vzext.vf2 v8, v9 368; CHECK-NEXT: ret 369 %a.zext = zext <2 x i16> %a to <2 x i32> 370 %b.zext = zext <2 x i16> %b to <2 x i32> 371 %sub = sub <2 x i32> %a.zext, %b.zext 372 %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true) 373 ret <2 x i32> %abs 374} 375 376define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) { 377; 378; CHECK-LABEL: uabd_4s: 379; CHECK: # %bb.0: 380; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 381; CHECK-NEXT: vminu.vv v10, v8, v9 382; CHECK-NEXT: vmaxu.vv v8, v8, v9 383; CHECK-NEXT: vsub.vv v8, v8, v10 384; CHECK-NEXT: ret 385 %a.zext = zext <4 x i32> %a to <4 x i64> 386 %b.zext = zext <4 x i32> %b to <4 x i64> 387 %sub = sub <4 x i64> %a.zext, %b.zext 388 %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true) 389 %trunc = trunc <4 x i64> %abs to <4 x i32> 390 ret <4 x i32> %trunc 391} 392 393define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) { 394; 395; CHECK-LABEL: uabd_4s_promoted_ops: 396; CHECK: # %bb.0: 397; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 398; CHECK-NEXT: vminu.vv v10, v8, v9 399; CHECK-NEXT: vmaxu.vv v8, v8, v9 400; CHECK-NEXT: vsub.vv v9, v8, v10 401; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma 402; CHECK-NEXT: vzext.vf2 v8, v9 403; CHECK-NEXT: ret 404 %a.zext = zext <4 x i16> %a to <4 x i32> 405 %b.zext = zext <4 x i16> %b to <4 x i32> 406 %sub = sub <4 x i32> %a.zext, %b.zext 407 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 408 ret <4 x i32> %abs 409} 410 411define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) { 412; CHECK-LABEL: uabd_2d: 413; CHECK: # %bb.0: 414; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 415; CHECK-NEXT: vminu.vv v10, v8, v9 416; CHECK-NEXT: vmaxu.vv v8, v8, v9 417; CHECK-NEXT: vsub.vv v8, v8, v10 418; CHECK-NEXT: ret 419 %a.zext = zext <2 x i64> %a to <2 x i128> 420 %b.zext = zext <2 x i64> %b to <2 x i128> 421 %sub = sub <2 x i128> %a.zext, %b.zext 422 %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true) 423 %trunc = trunc <2 x i128> %abs to <2 x i64> 424 ret <2 x i64> %trunc 425} 426 427define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) { 428; 429; CHECK-LABEL: uabd_2d_promoted_ops: 430; CHECK: # %bb.0: 431; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 432; CHECK-NEXT: vminu.vv v10, v8, v9 433; CHECK-NEXT: vmaxu.vv v8, v8, v9 434; CHECK-NEXT: vsub.vv v9, v8, v10 435; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 436; CHECK-NEXT: vzext.vf2 v8, v9 437; CHECK-NEXT: ret 438 %a.zext = zext <2 x i32> %a to <2 x i64> 439 %b.zext = zext <2 x i32> %b to <2 x i64> 440 %sub = sub <2 x i64> %a.zext, %b.zext 441 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 442 ret <2 x i64> %abs 443} 444 445define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) { 446; 447; CHECK-LABEL: uabd_v16i8_nuw: 448; CHECK: # %bb.0: 449; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 450; CHECK-NEXT: vsub.vv v8, v8, v9 451; CHECK-NEXT: vrsub.vi v9, v8, 0 452; CHECK-NEXT: vmax.vv v8, v8, v9 453; CHECK-NEXT: ret 454 %sub = sub nuw <16 x i8> %a, %b 455 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) 456 ret <16 x i8> %abs 457} 458 459define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) { 460; 461; CHECK-LABEL: uabd_v8i16_nuw: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 464; CHECK-NEXT: vsub.vv v8, v8, v9 465; CHECK-NEXT: vrsub.vi v9, v8, 0 466; CHECK-NEXT: vmax.vv v8, v8, v9 467; CHECK-NEXT: ret 468 %sub = sub nuw <8 x i16> %a, %b 469 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 470 ret <8 x i16> %abs 471} 472 473define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) { 474; 475; CHECK-LABEL: uabd_v4i32_nuw: 476; CHECK: # %bb.0: 477; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 478; CHECK-NEXT: vsub.vv v8, v8, v9 479; CHECK-NEXT: vrsub.vi v9, v8, 0 480; CHECK-NEXT: vmax.vv v8, v8, v9 481; CHECK-NEXT: ret 482 %sub = sub nuw <4 x i32> %a, %b 483 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 484 ret <4 x i32> %abs 485} 486 487define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) { 488; 489; CHECK-LABEL: uabd_v2i64_nuw: 490; CHECK: # %bb.0: 491; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 492; CHECK-NEXT: vsub.vv v8, v8, v9 493; CHECK-NEXT: vrsub.vi v9, v8, 0 494; CHECK-NEXT: vmax.vv v8, v8, v9 495; CHECK-NEXT: ret 496 %sub = sub nuw <2 x i64> %a, %b 497 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 498 ret <2 x i64> %abs 499} 500 501define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) { 502; 503; CHECK-LABEL: sabd_v16i8_nsw: 504; CHECK: # %bb.0: 505; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 506; CHECK-NEXT: vmin.vv v10, v8, v9 507; CHECK-NEXT: vmax.vv v8, v8, v9 508; CHECK-NEXT: vsub.vv v8, v8, v10 509; CHECK-NEXT: ret 510 %sub = sub nsw <16 x i8> %a, %b 511 %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) 512 ret <16 x i8> %abs 513} 514 515define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) { 516; 517; CHECK-LABEL: sabd_v8i16_nsw: 518; CHECK: # %bb.0: 519; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 520; CHECK-NEXT: vmin.vv v10, v8, v9 521; CHECK-NEXT: vmax.vv v8, v8, v9 522; CHECK-NEXT: vsub.vv v8, v8, v10 523; CHECK-NEXT: ret 524 %sub = sub nsw <8 x i16> %a, %b 525 %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) 526 ret <8 x i16> %abs 527} 528 529define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) { 530; 531; CHECK-LABEL: sabd_v4i32_nsw: 532; CHECK: # %bb.0: 533; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 534; CHECK-NEXT: vmin.vv v10, v8, v9 535; CHECK-NEXT: vmax.vv v8, v8, v9 536; CHECK-NEXT: vsub.vv v8, v8, v10 537; CHECK-NEXT: ret 538 %sub = sub nsw <4 x i32> %a, %b 539 %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) 540 ret <4 x i32> %abs 541} 542 543define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) { 544; 545; CHECK-LABEL: sabd_v2i64_nsw: 546; CHECK: # %bb.0: 547; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 548; CHECK-NEXT: vmin.vv v10, v8, v9 549; CHECK-NEXT: vmax.vv v8, v8, v9 550; CHECK-NEXT: vsub.vv v8, v8, v10 551; CHECK-NEXT: ret 552 %sub = sub nsw <2 x i64> %a, %b 553 %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true) 554 ret <2 x i64> %abs 555} 556 557define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) { 558; 559; CHECK-LABEL: smaxmin_v16i8: 560; CHECK: # %bb.0: 561; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 562; CHECK-NEXT: vmin.vv v10, v8, v9 563; CHECK-NEXT: vmax.vv v8, v8, v9 564; CHECK-NEXT: vsub.vv v8, v8, v10 565; CHECK-NEXT: ret 566 %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1) 567 %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1) 568 %sub = sub <16 x i8> %a, %b 569 ret <16 x i8> %sub 570} 571 572define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) { 573; 574; CHECK-LABEL: smaxmin_v8i16: 575; CHECK: # %bb.0: 576; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 577; CHECK-NEXT: vmin.vv v10, v8, v9 578; CHECK-NEXT: vmax.vv v8, v8, v9 579; CHECK-NEXT: vsub.vv v8, v8, v10 580; CHECK-NEXT: ret 581 %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1) 582 %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1) 583 %sub = sub <8 x i16> %a, %b 584 ret <8 x i16> %sub 585} 586 587define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) { 588; 589; CHECK-LABEL: smaxmin_v4i32: 590; CHECK: # %bb.0: 591; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 592; CHECK-NEXT: vmin.vv v10, v8, v9 593; CHECK-NEXT: vmax.vv v8, v8, v9 594; CHECK-NEXT: vsub.vv v8, v8, v10 595; CHECK-NEXT: ret 596 %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1) 597 %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1) 598 %sub = sub <4 x i32> %a, %b 599 ret <4 x i32> %sub 600} 601 602define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { 603; 604; CHECK-LABEL: smaxmin_v2i64: 605; CHECK: # %bb.0: 606; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 607; CHECK-NEXT: vmin.vv v10, v8, v9 608; CHECK-NEXT: vmax.vv v8, v8, v9 609; CHECK-NEXT: vsub.vv v8, v8, v10 610; CHECK-NEXT: ret 611 %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1) 612 %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1) 613 %sub = sub <2 x i64> %a, %b 614 ret <2 x i64> %sub 615} 616 617define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) { 618; 619; CHECK-LABEL: umaxmin_v16i8: 620; CHECK: # %bb.0: 621; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 622; CHECK-NEXT: vminu.vv v10, v8, v9 623; CHECK-NEXT: vmaxu.vv v8, v8, v9 624; CHECK-NEXT: vsub.vv v8, v8, v10 625; CHECK-NEXT: ret 626 %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1) 627 %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1) 628 %sub = sub <16 x i8> %a, %b 629 ret <16 x i8> %sub 630} 631 632define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) { 633; 634; CHECK-LABEL: umaxmin_v8i16: 635; CHECK: # %bb.0: 636; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 637; CHECK-NEXT: vminu.vv v10, v8, v9 638; CHECK-NEXT: vmaxu.vv v8, v8, v9 639; CHECK-NEXT: vsub.vv v8, v8, v10 640; CHECK-NEXT: ret 641 %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1) 642 %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1) 643 %sub = sub <8 x i16> %a, %b 644 ret <8 x i16> %sub 645} 646 647define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) { 648; 649; CHECK-LABEL: umaxmin_v4i32: 650; CHECK: # %bb.0: 651; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 652; CHECK-NEXT: vminu.vv v10, v8, v9 653; CHECK-NEXT: vmaxu.vv v8, v8, v9 654; CHECK-NEXT: vsub.vv v8, v8, v10 655; CHECK-NEXT: ret 656 %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1) 657 %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1) 658 %sub = sub <4 x i32> %a, %b 659 ret <4 x i32> %sub 660} 661 662define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { 663; 664; CHECK-LABEL: umaxmin_v2i64: 665; CHECK: # %bb.0: 666; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 667; CHECK-NEXT: vminu.vv v10, v8, v9 668; CHECK-NEXT: vmaxu.vv v8, v8, v9 669; CHECK-NEXT: vsub.vv v8, v8, v10 670; CHECK-NEXT: ret 671 %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1) 672 %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1) 673 %sub = sub <2 x i64> %a, %b 674 ret <2 x i64> %sub 675} 676 677define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) { 678; 679; CHECK-LABEL: umaxmin_v16i8_com1: 680; CHECK: # %bb.0: 681; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 682; CHECK-NEXT: vminu.vv v10, v8, v9 683; CHECK-NEXT: vmaxu.vv v8, v8, v9 684; CHECK-NEXT: vsub.vv v8, v8, v10 685; CHECK-NEXT: ret 686 %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1) 687 %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0) 688 %sub = sub <16 x i8> %a, %b 689 ret <16 x i8> %sub 690} 691 692declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) 693declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) 694 695declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1) 696declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) 697declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) 698 699declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1) 700declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) 701declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) 702 703declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) 704declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) 705 706declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1) 707 708declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) 709declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) 710declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 711declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>) 712declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) 713declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) 714declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 715declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>) 716declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) 717declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) 718declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) 719declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) 720declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) 721declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) 722declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) 723declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) 724 725;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 726; RV32: {{.*}} 727; RV64: {{.*}} 728