1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not. 5; Test each of those patterns with i8/i16/i32/i64. 6; Test each of those with a constant operand and a variable operand. 7; Test each of those with a 128-bit vector type. 8 9define i8 @unsigned_sat_constant_i8_using_min(i8 %x) { 10; CHECK-LABEL: unsigned_sat_constant_i8_using_min: 11; CHECK: // %bb.0: 12; CHECK-NEXT: and w9, w0, #0xff 13; CHECK-NEXT: mov w8, #-43 // =0xffffffd5 14; CHECK-NEXT: cmp w9, #213 15; CHECK-NEXT: csel w8, w0, w8, lo 16; CHECK-NEXT: add w0, w8, #42 17; CHECK-NEXT: ret 18 %c = icmp ult i8 %x, -43 19 %s = select i1 %c, i8 %x, i8 -43 20 %r = add i8 %s, 42 21 ret i8 %r 22} 23 24define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) { 25; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum: 26; CHECK: // %bb.0: 27; CHECK-NEXT: and w8, w0, #0xff 28; CHECK-NEXT: add w8, w8, #42 29; CHECK-NEXT: tst w8, #0x100 30; CHECK-NEXT: csinv w0, w8, wzr, eq 31; CHECK-NEXT: ret 32 %a = add i8 %x, 42 33 %c = icmp ugt i8 %x, %a 34 %r = select i1 %c, i8 -1, i8 %a 35 ret i8 %r 36} 37 38define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) { 39; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_notval: 40; CHECK: // %bb.0: 41; CHECK-NEXT: and w8, w0, #0xff 42; CHECK-NEXT: add w9, w0, #42 43; CHECK-NEXT: cmp w8, #213 44; CHECK-NEXT: csinv w0, w9, wzr, ls 45; CHECK-NEXT: ret 46 %a = add i8 %x, 42 47 %c = icmp ugt i8 %x, -43 48 %r = select i1 %c, i8 -1, i8 %a 49 ret i8 %r 50} 51 52define i16 @unsigned_sat_constant_i16_using_min(i16 %x) { 53; CHECK-LABEL: unsigned_sat_constant_i16_using_min: 54; CHECK: // %bb.0: 55; CHECK-NEXT: mov w8, #65493 // =0xffd5 56; CHECK-NEXT: cmp w8, w0, uxth 57; CHECK-NEXT: mov w8, #-43 // =0xffffffd5 58; CHECK-NEXT: csel w8, w0, w8, hi 59; CHECK-NEXT: add w0, w8, #42 60; CHECK-NEXT: ret 61 %c = icmp ult i16 %x, -43 62 %s = select i1 %c, i16 %x, i16 -43 63 %r = add i16 %s, 42 64 ret i16 %r 65} 66 67define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) { 68; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum: 69; CHECK: // %bb.0: 70; CHECK-NEXT: and w8, w0, #0xffff 71; CHECK-NEXT: add w8, w8, #42 72; CHECK-NEXT: tst w8, #0x10000 73; CHECK-NEXT: csinv w0, w8, wzr, eq 74; CHECK-NEXT: ret 75 %a = add i16 %x, 42 76 %c = icmp ugt i16 %x, %a 77 %r = select i1 %c, i16 -1, i16 %a 78 ret i16 %r 79} 80 81define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) { 82; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_notval: 83; CHECK: // %bb.0: 84; CHECK-NEXT: mov w8, #65493 // =0xffd5 85; CHECK-NEXT: add w9, w0, #42 86; CHECK-NEXT: cmp w8, w0, uxth 87; CHECK-NEXT: csinv w0, w9, wzr, hs 88; CHECK-NEXT: ret 89 %a = add i16 %x, 42 90 %c = icmp ugt i16 %x, -43 91 %r = select i1 %c, i16 -1, i16 %a 92 ret i16 %r 93} 94 95define i32 @unsigned_sat_constant_i32_using_min(i32 %x) { 96; CHECK-LABEL: unsigned_sat_constant_i32_using_min: 97; CHECK: // %bb.0: 98; CHECK-NEXT: mov w8, #-43 // =0xffffffd5 99; CHECK-NEXT: cmn w0, #43 100; CHECK-NEXT: csel w8, w0, w8, lo 101; CHECK-NEXT: add w0, w8, #42 102; CHECK-NEXT: ret 103 %c = icmp ult i32 %x, -43 104 %s = select i1 %c, i32 %x, i32 -43 105 %r = add i32 %s, 42 106 ret i32 %r 107} 108 109define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) { 110; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_sum: 111; CHECK: // %bb.0: 112; CHECK-NEXT: adds w8, w0, #42 113; CHECK-NEXT: csinv w0, w8, wzr, lo 114; CHECK-NEXT: ret 115 %a = add i32 %x, 42 116 %c = icmp ugt i32 %x, %a 117 %r = select i1 %c, i32 -1, i32 %a 118 ret i32 %r 119} 120 121define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) { 122; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_notval: 123; CHECK: // %bb.0: 124; CHECK-NEXT: adds w8, w0, #42 125; CHECK-NEXT: csinv w0, w8, wzr, lo 126; CHECK-NEXT: ret 127 %a = add i32 %x, 42 128 %c = icmp ugt i32 %x, -43 129 %r = select i1 %c, i32 -1, i32 %a 130 ret i32 %r 131} 132 133define i64 @unsigned_sat_constant_i64_using_min(i64 %x) { 134; CHECK-LABEL: unsigned_sat_constant_i64_using_min: 135; CHECK: // %bb.0: 136; CHECK-NEXT: mov x8, #-43 // =0xffffffffffffffd5 137; CHECK-NEXT: cmn x0, #43 138; CHECK-NEXT: csel x8, x0, x8, lo 139; CHECK-NEXT: add x0, x8, #42 140; CHECK-NEXT: ret 141 %c = icmp ult i64 %x, -43 142 %s = select i1 %c, i64 %x, i64 -43 143 %r = add i64 %s, 42 144 ret i64 %r 145} 146 147define i64 @unsigned_sat_constant_i64_using_cmp_sum(i64 %x) { 148; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_sum: 149; CHECK: // %bb.0: 150; CHECK-NEXT: adds x8, x0, #42 151; CHECK-NEXT: csinv x0, x8, xzr, lo 152; CHECK-NEXT: ret 153 %a = add i64 %x, 42 154 %c = icmp ugt i64 %x, %a 155 %r = select i1 %c, i64 -1, i64 %a 156 ret i64 %r 157} 158 159define i64 @unsigned_sat_constant_i64_using_cmp_notval(i64 %x) { 160; CHECK-LABEL: unsigned_sat_constant_i64_using_cmp_notval: 161; CHECK: // %bb.0: 162; CHECK-NEXT: adds x8, x0, #42 163; CHECK-NEXT: csinv x0, x8, xzr, lo 164; CHECK-NEXT: ret 165 %a = add i64 %x, 42 166 %c = icmp ugt i64 %x, -43 167 %r = select i1 %c, i64 -1, i64 %a 168 ret i64 %r 169} 170 171define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) { 172; CHECK-LABEL: unsigned_sat_variable_i8_using_min: 173; CHECK: // %bb.0: 174; CHECK-NEXT: and w8, w0, #0xff 175; CHECK-NEXT: mvn w9, w1 176; CHECK-NEXT: cmp w8, w9, uxtb 177; CHECK-NEXT: csinv w8, w0, w1, lo 178; CHECK-NEXT: add w0, w8, w1 179; CHECK-NEXT: ret 180 %noty = xor i8 %y, -1 181 %c = icmp ult i8 %x, %noty 182 %s = select i1 %c, i8 %x, i8 %noty 183 %r = add i8 %s, %y 184 ret i8 %r 185} 186 187define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) { 188; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum: 189; CHECK: // %bb.0: 190; CHECK-NEXT: and w8, w0, #0xff 191; CHECK-NEXT: add w8, w8, w1, uxtb 192; CHECK-NEXT: tst w8, #0x100 193; CHECK-NEXT: csinv w0, w8, wzr, eq 194; CHECK-NEXT: ret 195 %a = add i8 %x, %y 196 %c = icmp ugt i8 %x, %a 197 %r = select i1 %c, i8 -1, i8 %a 198 ret i8 %r 199} 200 201define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) { 202; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval: 203; CHECK: // %bb.0: 204; CHECK-NEXT: and w8, w1, #0xff 205; CHECK-NEXT: add w9, w0, w1 206; CHECK-NEXT: add w8, w8, w0, uxtb 207; CHECK-NEXT: tst w8, #0x100 208; CHECK-NEXT: csinv w0, w9, wzr, eq 209; CHECK-NEXT: ret 210 %noty = xor i8 %y, -1 211 %a = add i8 %x, %y 212 %c = icmp ugt i8 %x, %noty 213 %r = select i1 %c, i8 -1, i8 %a 214 ret i8 %r 215} 216 217define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) { 218; CHECK-LABEL: unsigned_sat_variable_i16_using_min: 219; CHECK: // %bb.0: 220; CHECK-NEXT: and w8, w0, #0xffff 221; CHECK-NEXT: mvn w9, w1 222; CHECK-NEXT: cmp w8, w9, uxth 223; CHECK-NEXT: csinv w8, w0, w1, lo 224; CHECK-NEXT: add w0, w8, w1 225; CHECK-NEXT: ret 226 %noty = xor i16 %y, -1 227 %c = icmp ult i16 %x, %noty 228 %s = select i1 %c, i16 %x, i16 %noty 229 %r = add i16 %s, %y 230 ret i16 %r 231} 232 233define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) { 234; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum: 235; CHECK: // %bb.0: 236; CHECK-NEXT: and w8, w0, #0xffff 237; CHECK-NEXT: add w8, w8, w1, uxth 238; CHECK-NEXT: tst w8, #0x10000 239; CHECK-NEXT: csinv w0, w8, wzr, eq 240; CHECK-NEXT: ret 241 %a = add i16 %x, %y 242 %c = icmp ugt i16 %x, %a 243 %r = select i1 %c, i16 -1, i16 %a 244 ret i16 %r 245} 246 247define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) { 248; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval: 249; CHECK: // %bb.0: 250; CHECK-NEXT: and w8, w1, #0xffff 251; CHECK-NEXT: add w9, w0, w1 252; CHECK-NEXT: add w8, w8, w0, uxth 253; CHECK-NEXT: tst w8, #0x10000 254; CHECK-NEXT: csinv w0, w9, wzr, eq 255; CHECK-NEXT: ret 256 %noty = xor i16 %y, -1 257 %a = add i16 %x, %y 258 %c = icmp ugt i16 %x, %noty 259 %r = select i1 %c, i16 -1, i16 %a 260 ret i16 %r 261} 262 263define i32 @unsigned_sat_variable_i32_using_min(i32 %x, i32 %y) { 264; CHECK-LABEL: unsigned_sat_variable_i32_using_min: 265; CHECK: // %bb.0: 266; CHECK-NEXT: mvn w8, w1 267; CHECK-NEXT: cmp w0, w8 268; CHECK-NEXT: csinv w8, w0, w1, lo 269; CHECK-NEXT: add w0, w8, w1 270; CHECK-NEXT: ret 271 %noty = xor i32 %y, -1 272 %c = icmp ult i32 %x, %noty 273 %s = select i1 %c, i32 %x, i32 %noty 274 %r = add i32 %s, %y 275 ret i32 %r 276} 277 278define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) { 279; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_sum: 280; CHECK: // %bb.0: 281; CHECK-NEXT: adds w8, w0, w1 282; CHECK-NEXT: csinv w0, w8, wzr, lo 283; CHECK-NEXT: ret 284 %a = add i32 %x, %y 285 %c = icmp ugt i32 %x, %a 286 %r = select i1 %c, i32 -1, i32 %a 287 ret i32 %r 288} 289 290define i32 @unsigned_sat_variable_i32_using_cmp_notval(i32 %x, i32 %y) { 291; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_notval: 292; CHECK: // %bb.0: 293; CHECK-NEXT: add w8, w0, w1 294; CHECK-NEXT: cmn w1, w0 295; CHECK-NEXT: csinv w0, w8, wzr, lo 296; CHECK-NEXT: ret 297 %noty = xor i32 %y, -1 298 %a = add i32 %x, %y 299 %c = icmp ugt i32 %x, %noty 300 %r = select i1 %c, i32 -1, i32 %a 301 ret i32 %r 302} 303 304define i64 @unsigned_sat_variable_i64_using_min(i64 %x, i64 %y) { 305; CHECK-LABEL: unsigned_sat_variable_i64_using_min: 306; CHECK: // %bb.0: 307; CHECK-NEXT: mvn x8, x1 308; CHECK-NEXT: cmp x0, x8 309; CHECK-NEXT: csinv x8, x0, x1, lo 310; CHECK-NEXT: add x0, x8, x1 311; CHECK-NEXT: ret 312 %noty = xor i64 %y, -1 313 %c = icmp ult i64 %x, %noty 314 %s = select i1 %c, i64 %x, i64 %noty 315 %r = add i64 %s, %y 316 ret i64 %r 317} 318 319define i64 @unsigned_sat_variable_i64_using_cmp_sum(i64 %x, i64 %y) { 320; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_sum: 321; CHECK: // %bb.0: 322; CHECK-NEXT: adds x8, x0, x1 323; CHECK-NEXT: csinv x0, x8, xzr, lo 324; CHECK-NEXT: ret 325 %a = add i64 %x, %y 326 %c = icmp ugt i64 %x, %a 327 %r = select i1 %c, i64 -1, i64 %a 328 ret i64 %r 329} 330 331define i64 @unsigned_sat_variable_i64_using_cmp_notval(i64 %x, i64 %y) { 332; CHECK-LABEL: unsigned_sat_variable_i64_using_cmp_notval: 333; CHECK: // %bb.0: 334; CHECK-NEXT: add x8, x0, x1 335; CHECK-NEXT: cmn x1, x0 336; CHECK-NEXT: csinv x0, x8, xzr, lo 337; CHECK-NEXT: ret 338 %noty = xor i64 %y, -1 339 %a = add i64 %x, %y 340 %c = icmp ugt i64 %x, %noty 341 %r = select i1 %c, i64 -1, i64 %a 342 ret i64 %r 343} 344 345define <16 x i8> @unsigned_sat_constant_v16i8_using_min(<16 x i8> %x) { 346; CHECK-LABEL: unsigned_sat_constant_v16i8_using_min: 347; CHECK: // %bb.0: 348; CHECK-NEXT: movi v1.16b, #213 349; CHECK-NEXT: movi v2.16b, #42 350; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b 351; CHECK-NEXT: add v0.16b, v0.16b, v2.16b 352; CHECK-NEXT: ret 353 %c = icmp ult <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 354 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 355 %r = add <16 x i8> %s, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 356 ret <16 x i8> %r 357} 358 359define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) { 360; CHECK-LABEL: unsigned_sat_constant_v16i8_using_cmp_sum: 361; CHECK: // %bb.0: 362; CHECK-NEXT: movi v1.16b, #42 363; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b 364; CHECK-NEXT: ret 365 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 366 %c = icmp ugt <16 x i8> %x, %a 367 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 368 ret <16 x i8> %r 369} 370 371define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_notval(<16 x i8> %x) { 372; CHECK-LABEL: unsigned_sat_constant_v16i8_using_cmp_notval: 373; CHECK: // %bb.0: 374; CHECK-NEXT: movi v1.16b, #42 375; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b 376; CHECK-NEXT: ret 377 %a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 378 %c = icmp ugt <16 x i8> %x, <i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43, i8 -43> 379 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 380 ret <16 x i8> %r 381} 382 383define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) { 384; CHECK-LABEL: unsigned_sat_constant_v8i16_using_min: 385; CHECK: // %bb.0: 386; CHECK-NEXT: mvni v1.8h, #42 387; CHECK-NEXT: movi v2.8h, #42 388; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h 389; CHECK-NEXT: add v0.8h, v0.8h, v2.8h 390; CHECK-NEXT: ret 391 %c = icmp ult <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 392 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 393 %r = add <8 x i16> %s, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 394 ret <8 x i16> %r 395} 396 397define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) { 398; CHECK-LABEL: unsigned_sat_constant_v8i16_using_cmp_sum: 399; CHECK: // %bb.0: 400; CHECK-NEXT: movi v1.8h, #42 401; CHECK-NEXT: uqadd v0.8h, v0.8h, v1.8h 402; CHECK-NEXT: ret 403 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 404 %c = icmp ugt <8 x i16> %x, %a 405 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 406 ret <8 x i16> %r 407} 408 409define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_notval(<8 x i16> %x) { 410; CHECK-LABEL: unsigned_sat_constant_v8i16_using_cmp_notval: 411; CHECK: // %bb.0: 412; CHECK-NEXT: movi v1.8h, #42 413; CHECK-NEXT: uqadd v0.8h, v0.8h, v1.8h 414; CHECK-NEXT: ret 415 %a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 416 %c = icmp ugt <8 x i16> %x, <i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43, i16 -43> 417 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 418 ret <8 x i16> %r 419} 420 421define <4 x i32> @unsigned_sat_constant_v4i32_using_min(<4 x i32> %x) { 422; CHECK-LABEL: unsigned_sat_constant_v4i32_using_min: 423; CHECK: // %bb.0: 424; CHECK-NEXT: mvni v1.4s, #42 425; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s 426; CHECK-NEXT: movi v1.4s, #42 427; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 428; CHECK-NEXT: ret 429 %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43> 430 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -43, i32 -43, i32 -43, i32 -43> 431 %r = add <4 x i32> %s, <i32 42, i32 42, i32 42, i32 42> 432 ret <4 x i32> %r 433} 434 435define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) { 436; CHECK-LABEL: unsigned_sat_constant_v4i32_using_cmp_sum: 437; CHECK: // %bb.0: 438; CHECK-NEXT: movi v1.4s, #42 439; CHECK-NEXT: uqadd v0.4s, v0.4s, v1.4s 440; CHECK-NEXT: ret 441 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> 442 %c = icmp ugt <4 x i32> %x, %a 443 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 444 ret <4 x i32> %r 445} 446 447define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_notval(<4 x i32> %x) { 448; CHECK-LABEL: unsigned_sat_constant_v4i32_using_cmp_notval: 449; CHECK: // %bb.0: 450; CHECK-NEXT: movi v1.4s, #42 451; CHECK-NEXT: uqadd v0.4s, v0.4s, v1.4s 452; CHECK-NEXT: ret 453 %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> 454 %c = icmp ugt <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43> 455 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 456 ret <4 x i32> %r 457} 458 459define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) { 460; CHECK-LABEL: unsigned_sat_constant_v2i64_using_min: 461; CHECK: // %bb.0: 462; CHECK-NEXT: mov x8, #-43 // =0xffffffffffffffd5 463; CHECK-NEXT: dup v1.2d, x8 464; CHECK-NEXT: mov w8, #42 // =0x2a 465; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d 466; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 467; CHECK-NEXT: dup v1.2d, x8 468; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 469; CHECK-NEXT: ret 470 %c = icmp ult <2 x i64> %x, <i64 -43, i64 -43> 471 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> <i64 -43, i64 -43> 472 %r = add <2 x i64> %s, <i64 42, i64 42> 473 ret <2 x i64> %r 474} 475 476define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) { 477; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: 478; CHECK: // %bb.0: 479; CHECK-NEXT: mov w8, #42 // =0x2a 480; CHECK-NEXT: dup v1.2d, x8 481; CHECK-NEXT: uqadd v0.2d, v0.2d, v1.2d 482; CHECK-NEXT: ret 483 %a = add <2 x i64> %x, <i64 42, i64 42> 484 %c = icmp ugt <2 x i64> %x, %a 485 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 486 ret <2 x i64> %r 487} 488 489define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) { 490; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: 491; CHECK: // %bb.0: 492; CHECK-NEXT: mov w8, #42 // =0x2a 493; CHECK-NEXT: dup v1.2d, x8 494; CHECK-NEXT: uqadd v0.2d, v0.2d, v1.2d 495; CHECK-NEXT: ret 496 %a = add <2 x i64> %x, <i64 42, i64 42> 497 %c = icmp ugt <2 x i64> %x, <i64 -43, i64 -43> 498 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 499 ret <2 x i64> %r 500} 501 502define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8> %y) { 503; CHECK-LABEL: unsigned_sat_variable_v16i8_using_min: 504; CHECK: // %bb.0: 505; CHECK-NEXT: mvn v2.16b, v1.16b 506; CHECK-NEXT: umin v0.16b, v0.16b, v2.16b 507; CHECK-NEXT: add v0.16b, v0.16b, v1.16b 508; CHECK-NEXT: ret 509 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 510 %c = icmp ult <16 x i8> %x, %noty 511 %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %noty 512 %r = add <16 x i8> %s, %y 513 ret <16 x i8> %r 514} 515 516define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) { 517; CHECK-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum: 518; CHECK: // %bb.0: 519; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b 520; CHECK-NEXT: ret 521 %a = add <16 x i8> %x, %y 522 %c = icmp ugt <16 x i8> %x, %a 523 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 524 ret <16 x i8> %r 525} 526 527define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_notval(<16 x i8> %x, <16 x i8> %y) { 528; CHECK-LABEL: unsigned_sat_variable_v16i8_using_cmp_notval: 529; CHECK: // %bb.0: 530; CHECK-NEXT: mvn v2.16b, v1.16b 531; CHECK-NEXT: add v1.16b, v0.16b, v1.16b 532; CHECK-NEXT: cmhi v0.16b, v0.16b, v2.16b 533; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b 534; CHECK-NEXT: ret 535 %noty = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 536 %a = add <16 x i8> %x, %y 537 %c = icmp ugt <16 x i8> %x, %noty 538 %r = select <16 x i1> %c, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %a 539 ret <16 x i8> %r 540} 541 542define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16> %y) { 543; CHECK-LABEL: unsigned_sat_variable_v8i16_using_min: 544; CHECK: // %bb.0: 545; CHECK-NEXT: mvn v2.16b, v1.16b 546; CHECK-NEXT: umin v0.8h, v0.8h, v2.8h 547; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 548; CHECK-NEXT: ret 549 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 550 %c = icmp ult <8 x i16> %x, %noty 551 %s = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %noty 552 %r = add <8 x i16> %s, %y 553 ret <8 x i16> %r 554} 555 556define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) { 557; CHECK-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum: 558; CHECK: // %bb.0: 559; CHECK-NEXT: uqadd v0.8h, v0.8h, v1.8h 560; CHECK-NEXT: ret 561 %a = add <8 x i16> %x, %y 562 %c = icmp ugt <8 x i16> %x, %a 563 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 564 ret <8 x i16> %r 565} 566 567define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_notval(<8 x i16> %x, <8 x i16> %y) { 568; CHECK-LABEL: unsigned_sat_variable_v8i16_using_cmp_notval: 569; CHECK: // %bb.0: 570; CHECK-NEXT: mvn v2.16b, v1.16b 571; CHECK-NEXT: add v1.8h, v0.8h, v1.8h 572; CHECK-NEXT: cmhi v0.8h, v0.8h, v2.8h 573; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b 574; CHECK-NEXT: ret 575 %noty = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 576 %a = add <8 x i16> %x, %y 577 %c = icmp ugt <8 x i16> %x, %noty 578 %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a 579 ret <8 x i16> %r 580} 581 582define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> %y) { 583; CHECK-LABEL: unsigned_sat_variable_v4i32_using_min: 584; CHECK: // %bb.0: 585; CHECK-NEXT: mvn v2.16b, v1.16b 586; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s 587; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 588; CHECK-NEXT: ret 589 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 590 %c = icmp ult <4 x i32> %x, %noty 591 %s = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %noty 592 %r = add <4 x i32> %s, %y 593 ret <4 x i32> %r 594} 595 596define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) { 597; CHECK-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum: 598; CHECK: // %bb.0: 599; CHECK-NEXT: uqadd v0.4s, v0.4s, v1.4s 600; CHECK-NEXT: ret 601 %a = add <4 x i32> %x, %y 602 %c = icmp ugt <4 x i32> %x, %a 603 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 604 ret <4 x i32> %r 605} 606 607define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_notval(<4 x i32> %x, <4 x i32> %y) { 608; CHECK-LABEL: unsigned_sat_variable_v4i32_using_cmp_notval: 609; CHECK: // %bb.0: 610; CHECK-NEXT: mvn v2.16b, v1.16b 611; CHECK-NEXT: add v1.4s, v0.4s, v1.4s 612; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s 613; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b 614; CHECK-NEXT: ret 615 %noty = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 616 %a = add <4 x i32> %x, %y 617 %c = icmp ugt <4 x i32> %x, %noty 618 %r = select <4 x i1> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %a 619 ret <4 x i32> %r 620} 621 622define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> %y) { 623; CHECK-LABEL: unsigned_sat_variable_v2i64_using_min: 624; CHECK: // %bb.0: 625; CHECK-NEXT: mvn v2.16b, v1.16b 626; CHECK-NEXT: cmhi v3.2d, v2.2d, v0.2d 627; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b 628; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 629; CHECK-NEXT: ret 630 %noty = xor <2 x i64> %y, <i64 -1, i64 -1> 631 %c = icmp ult <2 x i64> %x, %noty 632 %s = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %noty 633 %r = add <2 x i64> %s, %y 634 ret <2 x i64> %r 635} 636 637define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) { 638; CHECK-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum: 639; CHECK: // %bb.0: 640; CHECK-NEXT: uqadd v0.2d, v0.2d, v1.2d 641; CHECK-NEXT: ret 642 %a = add <2 x i64> %x, %y 643 %c = icmp ugt <2 x i64> %x, %a 644 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 645 ret <2 x i64> %r 646} 647 648define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_notval(<2 x i64> %x, <2 x i64> %y) { 649; CHECK-LABEL: unsigned_sat_variable_v2i64_using_cmp_notval: 650; CHECK: // %bb.0: 651; CHECK-NEXT: mvn v2.16b, v1.16b 652; CHECK-NEXT: add v1.2d, v0.2d, v1.2d 653; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d 654; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b 655; CHECK-NEXT: ret 656 %noty = xor <2 x i64> %y, <i64 -1, i64 -1> 657 %a = add <2 x i64> %x, %y 658 %c = icmp ugt <2 x i64> %x, %noty 659 %r = select <2 x i1> %c, <2 x i64> <i64 -1, i64 -1>, <2 x i64> %a 660 ret <2 x i64> %r 661} 662 663