1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefix=RV64I 4; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbkb -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefix=RV64ZBKB 6 7; FIXME: Use packw 8define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind { 9; RV64I-LABEL: pack_i32: 10; RV64I: # %bb.0: 11; RV64I-NEXT: slli a0, a0, 48 12; RV64I-NEXT: srli a0, a0, 48 13; RV64I-NEXT: slliw a1, a1, 16 14; RV64I-NEXT: or a0, a1, a0 15; RV64I-NEXT: ret 16; 17; RV64ZBKB-LABEL: pack_i32: 18; RV64ZBKB: # %bb.0: 19; RV64ZBKB-NEXT: zext.h a0, a0 20; RV64ZBKB-NEXT: slliw a1, a1, 16 21; RV64ZBKB-NEXT: or a0, a1, a0 22; RV64ZBKB-NEXT: ret 23 %shl = and i32 %a, 65535 24 %shl1 = shl i32 %b, 16 25 %or = or i32 %shl1, %shl 26 ret i32 %or 27} 28 29; FIXME: Use packw 30define signext i32 @pack_i32_2(i16 zeroext %a, i16 zeroext %b) nounwind { 31; RV64I-LABEL: pack_i32_2: 32; RV64I: # %bb.0: 33; RV64I-NEXT: slli a1, a1, 16 34; RV64I-NEXT: or a0, a1, a0 35; RV64I-NEXT: sext.w a0, a0 36; RV64I-NEXT: ret 37; 38; RV64ZBKB-LABEL: pack_i32_2: 39; RV64ZBKB: # %bb.0: 40; RV64ZBKB-NEXT: slli a1, a1, 16 41; RV64ZBKB-NEXT: or a0, a1, a0 42; RV64ZBKB-NEXT: sext.w a0, a0 43; RV64ZBKB-NEXT: ret 44 %zexta = zext i16 %a to i32 45 %zextb = zext i16 %b to i32 46 %shl1 = shl i32 %zextb, 16 47 %or = or i32 %shl1, %zexta 48 ret i32 %or 49} 50 51; Test case where we don't have a sign_extend_inreg after the or. 52; FIXME: Use packw 53define signext i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 signext %2) { 54; RV64I-LABEL: pack_i32_3: 55; RV64I: # %bb.0: 56; RV64I-NEXT: slli a0, a0, 16 57; RV64I-NEXT: or a0, a0, a1 58; RV64I-NEXT: addw a0, a0, a2 59; RV64I-NEXT: ret 60; 61; RV64ZBKB-LABEL: pack_i32_3: 62; RV64ZBKB: # %bb.0: 63; RV64ZBKB-NEXT: slli a0, a0, 16 64; RV64ZBKB-NEXT: or a0, a0, a1 65; RV64ZBKB-NEXT: addw a0, a0, a2 66; RV64ZBKB-NEXT: ret 67 %4 = zext i16 %0 to i32 68 %5 = shl nuw i32 %4, 16 69 %6 = zext i16 %1 to i32 70 %7 = or i32 %5, %6 71 %8 = add i32 %7, %2 72 ret i32 %8 73} 74 75define i64 @pack_i64(i64 %a, i64 %b) nounwind { 76; RV64I-LABEL: pack_i64: 77; RV64I: # %bb.0: 78; RV64I-NEXT: slli a0, a0, 32 79; RV64I-NEXT: srli a0, a0, 32 80; RV64I-NEXT: slli a1, a1, 32 81; RV64I-NEXT: or a0, a1, a0 82; RV64I-NEXT: ret 83; 84; RV64ZBKB-LABEL: pack_i64: 85; RV64ZBKB: # %bb.0: 86; RV64ZBKB-NEXT: pack a0, a0, a1 87; RV64ZBKB-NEXT: ret 88 %shl = and i64 %a, 4294967295 89 %shl1 = shl i64 %b, 32 90 %or = or i64 %shl1, %shl 91 ret i64 %or 92} 93 94; FIXME: The slli+srli isn't needed with pack. 95define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind { 96; RV64I-LABEL: pack_i64_2: 97; RV64I: # %bb.0: 98; RV64I-NEXT: slli a0, a0, 32 99; RV64I-NEXT: slli a1, a1, 32 100; RV64I-NEXT: srli a0, a0, 32 101; RV64I-NEXT: srli a1, a1, 32 102; RV64I-NEXT: slli a1, a1, 32 103; RV64I-NEXT: or a0, a1, a0 104; RV64I-NEXT: ret 105; 106; RV64ZBKB-LABEL: pack_i64_2: 107; RV64ZBKB: # %bb.0: 108; RV64ZBKB-NEXT: slli a1, a1, 32 109; RV64ZBKB-NEXT: srli a1, a1, 32 110; RV64ZBKB-NEXT: pack a0, a0, a1 111; RV64ZBKB-NEXT: ret 112 %zexta = zext i32 %a to i64 113 %zextb = zext i32 %b to i64 114 %shl1 = shl i64 %zextb, 32 115 %or = or i64 %shl1, %zexta 116 ret i64 %or 117} 118 119define i64 @pack_i64_3(ptr %0, ptr %1) { 120; RV64I-LABEL: pack_i64_3: 121; RV64I: # %bb.0: 122; RV64I-NEXT: lwu a0, 0(a0) 123; RV64I-NEXT: lwu a1, 0(a1) 124; RV64I-NEXT: slli a0, a0, 32 125; RV64I-NEXT: or a0, a0, a1 126; RV64I-NEXT: ret 127; 128; RV64ZBKB-LABEL: pack_i64_3: 129; RV64ZBKB: # %bb.0: 130; RV64ZBKB-NEXT: lwu a0, 0(a0) 131; RV64ZBKB-NEXT: lwu a1, 0(a1) 132; RV64ZBKB-NEXT: pack a0, a1, a0 133; RV64ZBKB-NEXT: ret 134 %3 = load i32, ptr %0, align 4 135 %4 = zext i32 %3 to i64 136 %5 = shl i64 %4, 32 137 %6 = load i32, ptr %1, align 4 138 %7 = zext i32 %6 to i64 139 %8 = or i64 %5, %7 140 ret i64 %8 141} 142 143; FIXME: Use packh 144define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind { 145; RV64I-LABEL: packh_i32: 146; RV64I: # %bb.0: 147; RV64I-NEXT: lui a2, 16 148; RV64I-NEXT: andi a0, a0, 255 149; RV64I-NEXT: addiw a2, a2, -256 150; RV64I-NEXT: slli a1, a1, 8 151; RV64I-NEXT: and a1, a1, a2 152; RV64I-NEXT: or a0, a1, a0 153; RV64I-NEXT: ret 154; 155; RV64ZBKB-LABEL: packh_i32: 156; RV64ZBKB: # %bb.0: 157; RV64ZBKB-NEXT: lui a2, 16 158; RV64ZBKB-NEXT: andi a0, a0, 255 159; RV64ZBKB-NEXT: addiw a2, a2, -256 160; RV64ZBKB-NEXT: slli a1, a1, 8 161; RV64ZBKB-NEXT: and a1, a1, a2 162; RV64ZBKB-NEXT: or a0, a1, a0 163; RV64ZBKB-NEXT: ret 164 %and = and i32 %a, 255 165 %and1 = shl i32 %b, 8 166 %shl = and i32 %and1, 65280 167 %or = or i32 %shl, %and 168 ret i32 %or 169} 170 171define i32 @packh_i32_2(i32 %a, i32 %b) nounwind { 172; RV64I-LABEL: packh_i32_2: 173; RV64I: # %bb.0: 174; RV64I-NEXT: andi a0, a0, 255 175; RV64I-NEXT: andi a1, a1, 255 176; RV64I-NEXT: slli a1, a1, 8 177; RV64I-NEXT: or a0, a1, a0 178; RV64I-NEXT: ret 179; 180; RV64ZBKB-LABEL: packh_i32_2: 181; RV64ZBKB: # %bb.0: 182; RV64ZBKB-NEXT: packh a0, a0, a1 183; RV64ZBKB-NEXT: ret 184 %and = and i32 %a, 255 185 %and1 = and i32 %b, 255 186 %shl = shl i32 %and1, 8 187 %or = or i32 %shl, %and 188 ret i32 %or 189} 190 191; FIXME: Use packh 192define i64 @packh_i64(i64 %a, i64 %b) nounwind { 193; RV64I-LABEL: packh_i64: 194; RV64I: # %bb.0: 195; RV64I-NEXT: lui a2, 16 196; RV64I-NEXT: andi a0, a0, 255 197; RV64I-NEXT: addiw a2, a2, -256 198; RV64I-NEXT: slli a1, a1, 8 199; RV64I-NEXT: and a1, a1, a2 200; RV64I-NEXT: or a0, a1, a0 201; RV64I-NEXT: ret 202; 203; RV64ZBKB-LABEL: packh_i64: 204; RV64ZBKB: # %bb.0: 205; RV64ZBKB-NEXT: lui a2, 16 206; RV64ZBKB-NEXT: andi a0, a0, 255 207; RV64ZBKB-NEXT: addiw a2, a2, -256 208; RV64ZBKB-NEXT: slli a1, a1, 8 209; RV64ZBKB-NEXT: and a1, a1, a2 210; RV64ZBKB-NEXT: or a0, a1, a0 211; RV64ZBKB-NEXT: ret 212 %and = and i64 %a, 255 213 %and1 = shl i64 %b, 8 214 %shl = and i64 %and1, 65280 215 %or = or i64 %shl, %and 216 ret i64 %or 217} 218 219define i64 @packh_i64_2(i64 %a, i64 %b) nounwind { 220; RV64I-LABEL: packh_i64_2: 221; RV64I: # %bb.0: 222; RV64I-NEXT: andi a0, a0, 255 223; RV64I-NEXT: andi a1, a1, 255 224; RV64I-NEXT: slli a1, a1, 8 225; RV64I-NEXT: or a0, a1, a0 226; RV64I-NEXT: ret 227; 228; RV64ZBKB-LABEL: packh_i64_2: 229; RV64ZBKB: # %bb.0: 230; RV64ZBKB-NEXT: packh a0, a0, a1 231; RV64ZBKB-NEXT: ret 232 %and = and i64 %a, 255 233 %and1 = and i64 %b, 255 234 %shl = shl i64 %and1, 8 235 %or = or i64 %shl, %and 236 ret i64 %or 237} 238 239define zeroext i16 @packh_i16(i8 zeroext %a, i8 zeroext %b) nounwind { 240; RV64I-LABEL: packh_i16: 241; RV64I: # %bb.0: 242; RV64I-NEXT: slli a1, a1, 8 243; RV64I-NEXT: or a0, a1, a0 244; RV64I-NEXT: ret 245; 246; RV64ZBKB-LABEL: packh_i16: 247; RV64ZBKB: # %bb.0: 248; RV64ZBKB-NEXT: packh a0, a0, a1 249; RV64ZBKB-NEXT: ret 250 %zext = zext i8 %a to i16 251 %zext1 = zext i8 %b to i16 252 %shl = shl i16 %zext1, 8 253 %or = or i16 %shl, %zext 254 ret i16 %or 255} 256 257define zeroext i16 @packh_i16_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2) { 258; RV64I-LABEL: packh_i16_2: 259; RV64I: # %bb.0: 260; RV64I-NEXT: add a0, a1, a0 261; RV64I-NEXT: andi a0, a0, 255 262; RV64I-NEXT: slli a0, a0, 8 263; RV64I-NEXT: or a0, a0, a2 264; RV64I-NEXT: ret 265; 266; RV64ZBKB-LABEL: packh_i16_2: 267; RV64ZBKB: # %bb.0: 268; RV64ZBKB-NEXT: add a0, a1, a0 269; RV64ZBKB-NEXT: packh a0, a2, a0 270; RV64ZBKB-NEXT: ret 271 %4 = add i8 %1, %0 272 %5 = zext i8 %4 to i16 273 %6 = shl i16 %5, 8 274 %7 = zext i8 %2 to i16 275 %8 = or i16 %6, %7 276 ret i16 %8 277} 278 279define void @packh_i16_3(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) { 280; RV64I-LABEL: packh_i16_3: 281; RV64I: # %bb.0: 282; RV64I-NEXT: add a0, a1, a0 283; RV64I-NEXT: andi a0, a0, 255 284; RV64I-NEXT: slli a0, a0, 8 285; RV64I-NEXT: or a0, a0, a2 286; RV64I-NEXT: sh a0, 0(a3) 287; RV64I-NEXT: ret 288; 289; RV64ZBKB-LABEL: packh_i16_3: 290; RV64ZBKB: # %bb.0: 291; RV64ZBKB-NEXT: add a0, a1, a0 292; RV64ZBKB-NEXT: packh a0, a2, a0 293; RV64ZBKB-NEXT: sh a0, 0(a3) 294; RV64ZBKB-NEXT: ret 295 %4 = add i8 %1, %0 296 %5 = zext i8 %4 to i16 297 %6 = shl i16 %5, 8 298 %7 = zext i8 %2 to i16 299 %8 = or i16 %6, %7 300 store i16 %8, ptr %p 301 ret void 302} 303 304define i64 @pack_i64_allWUsers(i32 signext %0, i32 signext %1, i32 signext %2) { 305; RV64I-LABEL: pack_i64_allWUsers: 306; RV64I: # %bb.0: 307; RV64I-NEXT: add a0, a1, a0 308; RV64I-NEXT: slli a2, a2, 32 309; RV64I-NEXT: slli a0, a0, 32 310; RV64I-NEXT: srli a0, a0, 32 311; RV64I-NEXT: slli a0, a0, 32 312; RV64I-NEXT: srli a2, a2, 32 313; RV64I-NEXT: or a0, a0, a2 314; RV64I-NEXT: ret 315; 316; RV64ZBKB-LABEL: pack_i64_allWUsers: 317; RV64ZBKB: # %bb.0: 318; RV64ZBKB-NEXT: add a0, a1, a0 319; RV64ZBKB-NEXT: slli a0, a0, 32 320; RV64ZBKB-NEXT: srli a0, a0, 32 321; RV64ZBKB-NEXT: pack a0, a2, a0 322; RV64ZBKB-NEXT: ret 323 %4 = add i32 %1, %0 324 %5 = zext i32 %4 to i64 325 %6 = shl i64 %5, 32 326 %7 = zext i32 %2 to i64 327 %8 = or i64 %6, %7 328 ret i64 %8 329} 330 331define signext i32 @pack_i32_allWUsers(i16 zeroext %0, i16 zeroext %1, i16 zeroext %2) { 332; RV64I-LABEL: pack_i32_allWUsers: 333; RV64I: # %bb.0: 334; RV64I-NEXT: add a0, a1, a0 335; RV64I-NEXT: slli a0, a0, 48 336; RV64I-NEXT: srli a0, a0, 48 337; RV64I-NEXT: slli a0, a0, 16 338; RV64I-NEXT: or a0, a0, a2 339; RV64I-NEXT: sext.w a0, a0 340; RV64I-NEXT: ret 341; 342; RV64ZBKB-LABEL: pack_i32_allWUsers: 343; RV64ZBKB: # %bb.0: 344; RV64ZBKB-NEXT: add a0, a1, a0 345; RV64ZBKB-NEXT: zext.h a0, a0 346; RV64ZBKB-NEXT: slli a0, a0, 16 347; RV64ZBKB-NEXT: or a0, a0, a2 348; RV64ZBKB-NEXT: sext.w a0, a0 349; RV64ZBKB-NEXT: ret 350 %4 = add i16 %1, %0 351 %5 = zext i16 %4 to i32 352 %6 = shl i32 %5, 16 353 %7 = zext i16 %2 to i32 354 %8 = or i32 %6, %7 355 ret i32 %8 356} 357 358define i64 @pack_i64_imm() { 359; RV64I-LABEL: pack_i64_imm: 360; RV64I: # %bb.0: 361; RV64I-NEXT: lui a0, 65793 362; RV64I-NEXT: slli a0, a0, 4 363; RV64I-NEXT: addi a0, a0, 257 364; RV64I-NEXT: slli a0, a0, 16 365; RV64I-NEXT: addi a0, a0, 257 366; RV64I-NEXT: slli a0, a0, 12 367; RV64I-NEXT: addi a0, a0, 16 368; RV64I-NEXT: ret 369; 370; RV64ZBKB-LABEL: pack_i64_imm: 371; RV64ZBKB: # %bb.0: 372; RV64ZBKB-NEXT: lui a0, 65793 373; RV64ZBKB-NEXT: addi a0, a0, 16 374; RV64ZBKB-NEXT: pack a0, a0, a0 375; RV64ZBKB-NEXT: ret 376 ret i64 1157442765409226768 ; 0x0101010101010101 377} 378 379define i32 @zexth_i32(i32 %a) nounwind { 380; RV64I-LABEL: zexth_i32: 381; RV64I: # %bb.0: 382; RV64I-NEXT: slli a0, a0, 48 383; RV64I-NEXT: srli a0, a0, 48 384; RV64I-NEXT: ret 385; 386; RV64ZBKB-LABEL: zexth_i32: 387; RV64ZBKB: # %bb.0: 388; RV64ZBKB-NEXT: zext.h a0, a0 389; RV64ZBKB-NEXT: ret 390 %and = and i32 %a, 65535 391 ret i32 %and 392} 393 394define i64 @zexth_i64(i64 %a) nounwind { 395; RV64I-LABEL: zexth_i64: 396; RV64I: # %bb.0: 397; RV64I-NEXT: slli a0, a0, 48 398; RV64I-NEXT: srli a0, a0, 48 399; RV64I-NEXT: ret 400; 401; RV64ZBKB-LABEL: zexth_i64: 402; RV64ZBKB: # %bb.0: 403; RV64ZBKB-NEXT: zext.h a0, a0 404; RV64ZBKB-NEXT: ret 405 %and = and i64 %a, 65535 406 ret i64 %and 407} 408 409define i32 @zext_i16_to_i32(i16 %a) nounwind { 410; RV64I-LABEL: zext_i16_to_i32: 411; RV64I: # %bb.0: 412; RV64I-NEXT: slli a0, a0, 48 413; RV64I-NEXT: srli a0, a0, 48 414; RV64I-NEXT: ret 415; 416; RV64ZBKB-LABEL: zext_i16_to_i32: 417; RV64ZBKB: # %bb.0: 418; RV64ZBKB-NEXT: zext.h a0, a0 419; RV64ZBKB-NEXT: ret 420 %1 = zext i16 %a to i32 421 ret i32 %1 422} 423 424define i64 @zext_i16_to_i64(i16 %a) nounwind { 425; RV64I-LABEL: zext_i16_to_i64: 426; RV64I: # %bb.0: 427; RV64I-NEXT: slli a0, a0, 48 428; RV64I-NEXT: srli a0, a0, 48 429; RV64I-NEXT: ret 430; 431; RV64ZBKB-LABEL: zext_i16_to_i64: 432; RV64ZBKB: # %bb.0: 433; RV64ZBKB-NEXT: zext.h a0, a0 434; RV64ZBKB-NEXT: ret 435 %1 = zext i16 %a to i64 436 ret i64 %1 437} 438 439; This creates a i16->i32 G_ZEXT that we need to be able to select 440define i32 @zext_i16_i32_2(i1 %z, ptr %x, i32 %y) { 441; RV64I-LABEL: zext_i16_i32_2: 442; RV64I: # %bb.0: 443; RV64I-NEXT: andi a3, a0, 1 444; RV64I-NEXT: bnez a3, .LBB20_2 445; RV64I-NEXT: # %bb.1: 446; RV64I-NEXT: mv a0, a2 447; RV64I-NEXT: ret 448; RV64I-NEXT: .LBB20_2: 449; RV64I-NEXT: lh a0, 0(a1) 450; RV64I-NEXT: slli a0, a0, 48 451; RV64I-NEXT: srli a0, a0, 48 452; RV64I-NEXT: ret 453; 454; RV64ZBKB-LABEL: zext_i16_i32_2: 455; RV64ZBKB: # %bb.0: 456; RV64ZBKB-NEXT: andi a3, a0, 1 457; RV64ZBKB-NEXT: bnez a3, .LBB20_2 458; RV64ZBKB-NEXT: # %bb.1: 459; RV64ZBKB-NEXT: mv a0, a2 460; RV64ZBKB-NEXT: ret 461; RV64ZBKB-NEXT: .LBB20_2: 462; RV64ZBKB-NEXT: lh a0, 0(a1) 463; RV64ZBKB-NEXT: zext.h a0, a0 464; RV64ZBKB-NEXT: ret 465 %w = load i16, ptr %x 466 %a = freeze i16 %w 467 %b = zext i16 %a to i32 468 %c = select i1 %z, i32 %b, i32 %y 469 ret i32 %c 470} 471 472; This creates a i16->i32 G_SEXT that we need to be able to select 473define i32 @sext_i16_i32(i1 %z, ptr %x, i32 %y) { 474; RV64I-LABEL: sext_i16_i32: 475; RV64I: # %bb.0: 476; RV64I-NEXT: andi a3, a0, 1 477; RV64I-NEXT: bnez a3, .LBB21_2 478; RV64I-NEXT: # %bb.1: 479; RV64I-NEXT: mv a0, a2 480; RV64I-NEXT: ret 481; RV64I-NEXT: .LBB21_2: 482; RV64I-NEXT: lh a0, 0(a1) 483; RV64I-NEXT: slli a0, a0, 48 484; RV64I-NEXT: srai a0, a0, 48 485; RV64I-NEXT: ret 486; 487; RV64ZBKB-LABEL: sext_i16_i32: 488; RV64ZBKB: # %bb.0: 489; RV64ZBKB-NEXT: andi a3, a0, 1 490; RV64ZBKB-NEXT: bnez a3, .LBB21_2 491; RV64ZBKB-NEXT: # %bb.1: 492; RV64ZBKB-NEXT: mv a0, a2 493; RV64ZBKB-NEXT: ret 494; RV64ZBKB-NEXT: .LBB21_2: 495; RV64ZBKB-NEXT: lh a0, 0(a1) 496; RV64ZBKB-NEXT: slli a0, a0, 48 497; RV64ZBKB-NEXT: srai a0, a0, 48 498; RV64ZBKB-NEXT: ret 499 %w = load i16, ptr %x 500 %a = freeze i16 %w 501 %b = sext i16 %a to i32 502 %c = select i1 %z, i32 %b, i32 %y 503 ret i32 %c 504} 505