1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s 3 4; https://bugs.llvm.org/show_bug.cgi?id=38149 5 6; We are truncating from wider width, and then sign-extending 7; back to the original width. Then we inequality-comparing orig and src. 8; If they don't match, then we had signed truncation during truncation. 9 10; This can be expressed in a several ways in IR: 11; trunc + sext + icmp ne <- not canonical 12; shl + ashr + icmp ne 13; add + icmp ult/ule 14; add + icmp uge/ugt 15; However only the simplest form (with two shifts) gets lowered best. 16 17; ---------------------------------------------------------------------------- ; 18; shl + ashr + icmp ne 19; ---------------------------------------------------------------------------- ; 20 21define i1 @shifts_necmp_i16_i8(i16 %x) nounwind { 22; CHECK-LABEL: shifts_necmp_i16_i8: 23; CHECK: // %bb.0: 24; CHECK-NEXT: sxtb w8, w0 25; CHECK-NEXT: and w8, w8, #0xffff 26; CHECK-NEXT: cmp w8, w0, uxth 27; CHECK-NEXT: cset w0, ne 28; CHECK-NEXT: ret 29 %tmp0 = shl i16 %x, 8 ; 16-8 30 %tmp1 = ashr exact i16 %tmp0, 8 ; 16-8 31 %tmp2 = icmp ne i16 %tmp1, %x 32 ret i1 %tmp2 33} 34 35define i1 @shifts_necmp_i32_i16(i32 %x) nounwind { 36; CHECK-LABEL: shifts_necmp_i32_i16: 37; CHECK: // %bb.0: 38; CHECK-NEXT: cmp w0, w0, sxth 39; CHECK-NEXT: cset w0, ne 40; CHECK-NEXT: ret 41 %tmp0 = shl i32 %x, 16 ; 32-16 42 %tmp1 = ashr exact i32 %tmp0, 16 ; 32-16 43 %tmp2 = icmp ne i32 %tmp1, %x 44 ret i1 %tmp2 45} 46 47define i1 @shifts_necmp_i32_i8(i32 %x) nounwind { 48; CHECK-LABEL: shifts_necmp_i32_i8: 49; CHECK: // %bb.0: 50; CHECK-NEXT: cmp w0, w0, sxtb 51; CHECK-NEXT: cset w0, ne 52; CHECK-NEXT: ret 53 %tmp0 = shl i32 %x, 24 ; 32-8 54 %tmp1 = ashr exact i32 %tmp0, 24 ; 32-8 55 %tmp2 = icmp ne i32 %tmp1, %x 56 ret i1 %tmp2 57} 58 59define i1 @shifts_necmp_i64_i32(i64 %x) nounwind { 60; CHECK-LABEL: shifts_necmp_i64_i32: 61; CHECK: // %bb.0: 62; CHECK-NEXT: cmp x0, w0, sxtw 63; CHECK-NEXT: cset w0, ne 64; CHECK-NEXT: ret 65 %tmp0 = shl i64 %x, 32 ; 64-32 66 %tmp1 = ashr exact i64 %tmp0, 32 ; 64-32 67 %tmp2 = icmp ne i64 %tmp1, %x 68 ret i1 %tmp2 69} 70 71define i1 @shifts_necmp_i64_i16(i64 %x) nounwind { 72; CHECK-LABEL: shifts_necmp_i64_i16: 73; CHECK: // %bb.0: 74; CHECK-NEXT: cmp x0, w0, sxth 75; CHECK-NEXT: cset w0, ne 76; CHECK-NEXT: ret 77 %tmp0 = shl i64 %x, 48 ; 64-16 78 %tmp1 = ashr exact i64 %tmp0, 48 ; 64-16 79 %tmp2 = icmp ne i64 %tmp1, %x 80 ret i1 %tmp2 81} 82 83define i1 @shifts_necmp_i64_i8(i64 %x) nounwind { 84; CHECK-LABEL: shifts_necmp_i64_i8: 85; CHECK: // %bb.0: 86; CHECK-NEXT: cmp x0, w0, sxtb 87; CHECK-NEXT: cset w0, ne 88; CHECK-NEXT: ret 89 %tmp0 = shl i64 %x, 56 ; 64-8 90 %tmp1 = ashr exact i64 %tmp0, 56 ; 64-8 91 %tmp2 = icmp ne i64 %tmp1, %x 92 ret i1 %tmp2 93} 94 95; ---------------------------------------------------------------------------- ; 96; add + icmp ult 97; ---------------------------------------------------------------------------- ; 98 99define i1 @add_ultcmp_i16_i8(i16 %x) nounwind { 100; CHECK-LABEL: add_ultcmp_i16_i8: 101; CHECK: // %bb.0: 102; CHECK-NEXT: and w8, w0, #0xffff 103; CHECK-NEXT: sub w8, w8, #128 104; CHECK-NEXT: lsr w8, w8, #8 105; CHECK-NEXT: cmp w8, #255 106; CHECK-NEXT: cset w0, lo 107; CHECK-NEXT: ret 108 %tmp0 = add i16 %x, -128 ; ~0U << (8-1) 109 %tmp1 = icmp ult i16 %tmp0, -256 ; ~0U << 8 110 ret i1 %tmp1 111} 112 113define i1 @add_ultcmp_i32_i16(i32 %x) nounwind { 114; CHECK-LABEL: add_ultcmp_i32_i16: 115; CHECK: // %bb.0: 116; CHECK-NEXT: cmp w0, w0, sxth 117; CHECK-NEXT: cset w0, ne 118; CHECK-NEXT: ret 119 %tmp0 = add i32 %x, -32768 ; ~0U << (16-1) 120 %tmp1 = icmp ult i32 %tmp0, -65536 ; ~0U << 16 121 ret i1 %tmp1 122} 123 124define i1 @add_ultcmp_i32_i8(i32 %x) nounwind { 125; CHECK-LABEL: add_ultcmp_i32_i8: 126; CHECK: // %bb.0: 127; CHECK-NEXT: cmp w0, w0, sxtb 128; CHECK-NEXT: cset w0, ne 129; CHECK-NEXT: ret 130 %tmp0 = add i32 %x, -128 ; ~0U << (8-1) 131 %tmp1 = icmp ult i32 %tmp0, -256 ; ~0U << 8 132 ret i1 %tmp1 133} 134 135define i1 @add_ultcmp_i64_i32(i64 %x) nounwind { 136; CHECK-LABEL: add_ultcmp_i64_i32: 137; CHECK: // %bb.0: 138; CHECK-NEXT: cmp x0, w0, sxtw 139; CHECK-NEXT: cset w0, ne 140; CHECK-NEXT: ret 141 %tmp0 = add i64 %x, -2147483648 ; ~0U << (32-1) 142 %tmp1 = icmp ult i64 %tmp0, -4294967296 ; ~0U << 32 143 ret i1 %tmp1 144} 145 146define i1 @add_ultcmp_i64_i16(i64 %x) nounwind { 147; CHECK-LABEL: add_ultcmp_i64_i16: 148; CHECK: // %bb.0: 149; CHECK-NEXT: cmp x0, w0, sxth 150; CHECK-NEXT: cset w0, ne 151; CHECK-NEXT: ret 152 %tmp0 = add i64 %x, -32768 ; ~0U << (16-1) 153 %tmp1 = icmp ult i64 %tmp0, -65536 ; ~0U << 16 154 ret i1 %tmp1 155} 156 157define i1 @add_ultcmp_i64_i8(i64 %x) nounwind { 158; CHECK-LABEL: add_ultcmp_i64_i8: 159; CHECK: // %bb.0: 160; CHECK-NEXT: cmp x0, w0, sxtb 161; CHECK-NEXT: cset w0, ne 162; CHECK-NEXT: ret 163 %tmp0 = add i64 %x, -128 ; ~0U << (8-1) 164 %tmp1 = icmp ult i64 %tmp0, -256 ; ~0U << 8 165 ret i1 %tmp1 166} 167 168; Slightly more canonical variant 169define i1 @add_ulecmp_i16_i8(i16 %x) nounwind { 170; CHECK-LABEL: add_ulecmp_i16_i8: 171; CHECK: // %bb.0: 172; CHECK-NEXT: and w8, w0, #0xffff 173; CHECK-NEXT: sub w8, w8, #128 174; CHECK-NEXT: lsr w8, w8, #8 175; CHECK-NEXT: cmp w8, #255 176; CHECK-NEXT: cset w0, lo 177; CHECK-NEXT: ret 178 %tmp0 = add i16 %x, -128 ; ~0U << (8-1) 179 %tmp1 = icmp ule i16 %tmp0, -257 ; ~0U << 8 - 1 180 ret i1 %tmp1 181} 182 183; ---------------------------------------------------------------------------- ; 184; add + icmp uge 185; ---------------------------------------------------------------------------- ; 186 187define i1 @add_ugecmp_i16_i8(i16 %x) nounwind { 188; CHECK-LABEL: add_ugecmp_i16_i8: 189; CHECK: // %bb.0: 190; CHECK-NEXT: sxtb w8, w0 191; CHECK-NEXT: and w8, w8, #0xffff 192; CHECK-NEXT: cmp w8, w0, uxth 193; CHECK-NEXT: cset w0, ne 194; CHECK-NEXT: ret 195 %tmp0 = add i16 %x, 128 ; 1U << (8-1) 196 %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 197 ret i1 %tmp1 198} 199 200define i1 @add_ugecmp_i32_i16(i32 %x) nounwind { 201; CHECK-LABEL: add_ugecmp_i32_i16: 202; CHECK: // %bb.0: 203; CHECK-NEXT: cmp w0, w0, sxth 204; CHECK-NEXT: cset w0, ne 205; CHECK-NEXT: ret 206 %tmp0 = add i32 %x, 32768 ; 1U << (16-1) 207 %tmp1 = icmp uge i32 %tmp0, 65536 ; 1U << 16 208 ret i1 %tmp1 209} 210 211define i1 @add_ugecmp_i32_i8(i32 %x) nounwind { 212; CHECK-LABEL: add_ugecmp_i32_i8: 213; CHECK: // %bb.0: 214; CHECK-NEXT: cmp w0, w0, sxtb 215; CHECK-NEXT: cset w0, ne 216; CHECK-NEXT: ret 217 %tmp0 = add i32 %x, 128 ; 1U << (8-1) 218 %tmp1 = icmp uge i32 %tmp0, 256 ; 1U << 8 219 ret i1 %tmp1 220} 221 222define i1 @add_ugecmp_i64_i32(i64 %x) nounwind { 223; CHECK-LABEL: add_ugecmp_i64_i32: 224; CHECK: // %bb.0: 225; CHECK-NEXT: cmp x0, w0, sxtw 226; CHECK-NEXT: cset w0, ne 227; CHECK-NEXT: ret 228 %tmp0 = add i64 %x, 2147483648 ; 1U << (32-1) 229 %tmp1 = icmp uge i64 %tmp0, 4294967296 ; 1U << 32 230 ret i1 %tmp1 231} 232 233define i1 @add_ugecmp_i64_i16(i64 %x) nounwind { 234; CHECK-LABEL: add_ugecmp_i64_i16: 235; CHECK: // %bb.0: 236; CHECK-NEXT: cmp x0, w0, sxth 237; CHECK-NEXT: cset w0, ne 238; CHECK-NEXT: ret 239 %tmp0 = add i64 %x, 32768 ; 1U << (16-1) 240 %tmp1 = icmp uge i64 %tmp0, 65536 ; 1U << 16 241 ret i1 %tmp1 242} 243 244define i1 @add_ugecmp_i64_i8(i64 %x) nounwind { 245; CHECK-LABEL: add_ugecmp_i64_i8: 246; CHECK: // %bb.0: 247; CHECK-NEXT: cmp x0, w0, sxtb 248; CHECK-NEXT: cset w0, ne 249; CHECK-NEXT: ret 250 %tmp0 = add i64 %x, 128 ; 1U << (8-1) 251 %tmp1 = icmp uge i64 %tmp0, 256 ; 1U << 8 252 ret i1 %tmp1 253} 254 255; Slightly more canonical variant 256define i1 @add_ugtcmp_i16_i8(i16 %x) nounwind { 257; CHECK-LABEL: add_ugtcmp_i16_i8: 258; CHECK: // %bb.0: 259; CHECK-NEXT: sxtb w8, w0 260; CHECK-NEXT: and w8, w8, #0xffff 261; CHECK-NEXT: cmp w8, w0, uxth 262; CHECK-NEXT: cset w0, ne 263; CHECK-NEXT: ret 264 %tmp0 = add i16 %x, 128 ; 1U << (8-1) 265 %tmp1 = icmp ugt i16 %tmp0, 255 ; (1U << 8) - 1 266 ret i1 %tmp1 267} 268 269; Negative tests 270; ---------------------------------------------------------------------------- ; 271 272; Adding not a constant 273define i1 @add_ugecmp_bad_i16_i8_add(i16 %x, i16 %y) nounwind { 274; CHECK-LABEL: add_ugecmp_bad_i16_i8_add: 275; CHECK: // %bb.0: 276; CHECK-NEXT: add w8, w0, w1 277; CHECK-NEXT: and w8, w8, #0xffff 278; CHECK-NEXT: cmp w8, #255 279; CHECK-NEXT: cset w0, hi 280; CHECK-NEXT: ret 281 %tmp0 = add i16 %x, %y 282 %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 283 ret i1 %tmp1 284} 285 286; Comparing not with a constant 287define i1 @add_ugecmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind { 288; CHECK-LABEL: add_ugecmp_bad_i16_i8_cmp: 289; CHECK: // %bb.0: 290; CHECK-NEXT: add w8, w0, #128 291; CHECK-NEXT: and w8, w8, #0xffff 292; CHECK-NEXT: cmp w8, w1, uxth 293; CHECK-NEXT: cset w0, hs 294; CHECK-NEXT: ret 295 %tmp0 = add i16 %x, 128 ; 1U << (8-1) 296 %tmp1 = icmp uge i16 %tmp0, %y 297 ret i1 %tmp1 298} 299 300; Second constant is not larger than the first one 301define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind { 302; CHECK-LABEL: add_ugecmp_bad_i8_i16: 303; CHECK: // %bb.0: 304; CHECK-NEXT: add w8, w0, #128 305; CHECK-NEXT: and w8, w8, #0xffff 306; CHECK-NEXT: cmp w8, #127 307; CHECK-NEXT: cset w0, hi 308; CHECK-NEXT: ret 309 %tmp0 = add i16 %x, 128 ; 1U << (8-1) 310 %tmp1 = icmp uge i16 %tmp0, 128 ; 1U << (8-1) 311 ret i1 %tmp1 312} 313 314; First constant is not power of two 315define i1 @add_ugecmp_bad_i16_i8_c0notpoweroftwo(i16 %x) nounwind { 316; CHECK-LABEL: add_ugecmp_bad_i16_i8_c0notpoweroftwo: 317; CHECK: // %bb.0: 318; CHECK-NEXT: add w8, w0, #192 319; CHECK-NEXT: and w8, w8, #0xffff 320; CHECK-NEXT: cmp w8, #255 321; CHECK-NEXT: cset w0, hi 322; CHECK-NEXT: ret 323 %tmp0 = add i16 %x, 192 ; (1U << (8-1)) + (1U << (8-1-1)) 324 %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 325 ret i1 %tmp1 326} 327 328; Second constant is not power of two 329define i1 @add_ugecmp_bad_i16_i8_c1notpoweroftwo(i16 %x) nounwind { 330; CHECK-LABEL: add_ugecmp_bad_i16_i8_c1notpoweroftwo: 331; CHECK: // %bb.0: 332; CHECK-NEXT: add w8, w0, #128 333; CHECK-NEXT: and w8, w8, #0xffff 334; CHECK-NEXT: cmp w8, #767 335; CHECK-NEXT: cset w0, hi 336; CHECK-NEXT: ret 337 %tmp0 = add i16 %x, 128 ; 1U << (8-1) 338 %tmp1 = icmp uge i16 %tmp0, 768 ; (1U << 8)) + (1U << (8+1)) 339 ret i1 %tmp1 340} 341 342; Magic check fails, 64 << 1 != 256 343define i1 @add_ugecmp_bad_i16_i8_magic(i16 %x) nounwind { 344; CHECK-LABEL: add_ugecmp_bad_i16_i8_magic: 345; CHECK: // %bb.0: 346; CHECK-NEXT: add w8, w0, #64 347; CHECK-NEXT: and w8, w8, #0xffff 348; CHECK-NEXT: cmp w8, #255 349; CHECK-NEXT: cset w0, hi 350; CHECK-NEXT: ret 351 %tmp0 = add i16 %x, 64 ; 1U << (8-1-1) 352 %tmp1 = icmp uge i16 %tmp0, 256 ; 1U << 8 353 ret i1 %tmp1 354} 355 356; Bad 'destination type' 357define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind { 358; CHECK-LABEL: add_ugecmp_bad_i16_i4: 359; CHECK: // %bb.0: 360; CHECK-NEXT: add w8, w0, #8 361; CHECK-NEXT: and w8, w8, #0xffff 362; CHECK-NEXT: cmp w8, #15 363; CHECK-NEXT: cset w0, hi 364; CHECK-NEXT: ret 365 %tmp0 = add i16 %x, 8 ; 1U << (4-1) 366 %tmp1 = icmp uge i16 %tmp0, 16 ; 1U << 4 367 ret i1 %tmp1 368} 369 370; Bad storage type 371define i1 @add_ugecmp_bad_i24_i8(i24 %x) nounwind { 372; CHECK-LABEL: add_ugecmp_bad_i24_i8: 373; CHECK: // %bb.0: 374; CHECK-NEXT: add w8, w0, #128 375; CHECK-NEXT: and w8, w8, #0xffffff 376; CHECK-NEXT: cmp w8, #255 377; CHECK-NEXT: cset w0, hi 378; CHECK-NEXT: ret 379 %tmp0 = add i24 %x, 128 ; 1U << (8-1) 380 %tmp1 = icmp uge i24 %tmp0, 256 ; 1U << 8 381 ret i1 %tmp1 382} 383 384; Slightly more canonical variant 385define i1 @add_ugtcmp_bad_i16_i8(i16 %x) nounwind { 386; CHECK-LABEL: add_ugtcmp_bad_i16_i8: 387; CHECK: // %bb.0: 388; CHECK-NEXT: mov w0, wzr 389; CHECK-NEXT: ret 390 %tmp0 = add i16 %x, 128 ; 1U << (8-1) 391 %tmp1 = icmp ugt i16 %tmp0, -1 ; when we +1 it, it will wrap to 0 392 ret i1 %tmp1 393} 394