1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s 3; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s 4; RUN: llc -mtriple=aarch64_be-unknown-linux -aarch64-enable-ext-to-tbl=false -o - %s | FileCheck --check-prefix=CHECK-DISABLE %s 5 6; CHECK-LABEL: lCPI0_0: 7; CHECK-NEXT: .byte 0 ; 0x0 8; CHECK-NEXT: .byte 4 ; 0x4 9; CHECK-NEXT: .byte 8 ; 0x8 10; CHECK-NEXT: .byte 12 ; 0xc 11; CHECK-NEXT: .byte 16 ; 0x10 12; CHECK-NEXT: .byte 20 ; 0x14 13; CHECK-NEXT: .byte 24 ; 0x18 14; CHECK-NEXT: .byte 28 ; 0x1c 15; CHECK-NEXT: .byte 32 ; 0x20 16; CHECK-NEXT: .byte 36 ; 0x24 17; CHECK-NEXT: .byte 40 ; 0x28 18; CHECK-NEXT: .byte 44 ; 0x2c 19; CHECK-NEXT: .byte 48 ; 0x30 20; CHECK-NEXT: .byte 52 ; 0x34 21; CHECK-NEXT: .byte 56 ; 0x38 22; CHECK-NEXT: .byte 60 ; 0x3c 23 24; CHECK-BE-LABEL: .LCPI0_0: 25; CHECK-BE-NEXT: .byte 3 // 0x3 26; CHECK-BE-NEXT: .byte 7 // 0x7 27; CHECK-BE-NEXT: .byte 11 // 0xb 28; CHECK-BE-NEXT: .byte 15 // 0xf 29; CHECK-BE-NEXT: .byte 19 // 0x13 30; CHECK-BE-NEXT: .byte 23 // 0x17 31; CHECK-BE-NEXT: .byte 27 // 0x1b 32; CHECK-BE-NEXT: .byte 31 // 0x1f 33; CHECK-BE-NEXT: .byte 35 // 0x23 34; CHECK-BE-NEXT: .byte 39 // 0x27 35; CHECK-BE-NEXT: .byte 43 // 0x2b 36; CHECK-BE-NEXT: .byte 47 // 0x2f 37; CHECK-BE-NEXT: .byte 51 // 0x33 38; CHECK-BE-NEXT: .byte 55 // 0x37 39; CHECK-BE-NEXT: .byte 59 // 0x3b 40; CHECK-BE-NEXT: .byte 63 // 0x3f 41 42; It's profitable to use a single tbl.4 instruction to lower the truncate. 43define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) { 44; CHECK-LABEL: trunc_v16i32_to_v16i8_in_loop: 45; CHECK: ; %bb.0: ; %entry 46; CHECK-NEXT: Lloh0: 47; CHECK-NEXT: adrp x8, lCPI0_0@PAGE 48; CHECK-NEXT: Lloh1: 49; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] 50; CHECK-NEXT: mov x8, xzr 51; CHECK-NEXT: LBB0_1: ; %loop 52; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 53; CHECK-NEXT: add x9, x0, x8, lsl #6 54; CHECK-NEXT: ldp q1, q2, [x9] 55; CHECK-NEXT: ldp q3, q4, [x9, #32] 56; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 57; CHECK-NEXT: str q1, [x1, x8, lsl #4] 58; CHECK-NEXT: add x8, x8, #1 59; CHECK-NEXT: cmp x8, #1000 60; CHECK-NEXT: b.eq LBB0_1 61; CHECK-NEXT: ; %bb.2: ; %exit 62; CHECK-NEXT: ret 63; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 64; 65; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_in_loop: 66; CHECK-BE: // %bb.0: // %entry 67; CHECK-BE-NEXT: adrp x8, .LCPI0_0 68; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI0_0 69; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] 70; CHECK-BE-NEXT: mov x8, xzr 71; CHECK-BE-NEXT: .LBB0_1: // %loop 72; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 73; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 74; CHECK-BE-NEXT: add x10, x9, #16 75; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] 76; CHECK-BE-NEXT: add x11, x9, #32 77; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] 78; CHECK-BE-NEXT: add x9, x9, #48 79; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] 80; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] 81; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 82; CHECK-BE-NEXT: add x8, x8, #1 83; CHECK-BE-NEXT: cmp x8, #1000 84; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b 85; CHECK-BE-NEXT: st1 { v1.16b }, [x9] 86; CHECK-BE-NEXT: b.eq .LBB0_1 87; CHECK-BE-NEXT: // %bb.2: // %exit 88; CHECK-BE-NEXT: ret 89; 90; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_in_loop: 91; CHECK-DISABLE: // %bb.0: // %entry 92; CHECK-DISABLE-NEXT: mov x8, xzr 93; CHECK-DISABLE-NEXT: .LBB0_1: // %loop 94; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 95; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6 96; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9] 97; CHECK-DISABLE-NEXT: add x10, x9, #16 98; CHECK-DISABLE-NEXT: add x11, x9, #48 99; CHECK-DISABLE-NEXT: add x9, x9, #32 100; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10] 101; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x11] 102; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x9] 103; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4 104; CHECK-DISABLE-NEXT: add x8, x8, #1 105; CHECK-DISABLE-NEXT: cmp x8, #1000 106; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h 107; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h 108; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b 109; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9] 110; CHECK-DISABLE-NEXT: b.eq .LBB0_1 111; CHECK-DISABLE-NEXT: // %bb.2: // %exit 112; CHECK-DISABLE-NEXT: ret 113entry: 114 br label %loop 115 116loop: 117 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 118 %gep.A = getelementptr inbounds <16 x i32>, ptr %A, i64 %iv 119 %l.A = load <16 x i32>, ptr %gep.A 120 %trunc = trunc <16 x i32> %l.A to <16 x i8> 121 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv 122 store <16 x i8> %trunc, ptr %gep.dst 123 %iv.next = add i64 %iv, 1 124 %ec = icmp eq i64 %iv.next, 1000 125 br i1 %ec, label %loop, label %exit 126 127exit: 128 ret void 129} 130 131; Not profitable to use tbl, as materializing the masks requires more 132; instructions. 133define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) { 134; CHECK-LABEL: trunc_v16i32_to_v16i8_no_loop: 135; CHECK: ; %bb.0: ; %entry 136; CHECK-NEXT: ldp q1, q0, [x0] 137; CHECK-NEXT: ldp q3, q2, [x0, #32] 138; CHECK-NEXT: uzp1.8h v0, v1, v0 139; CHECK-NEXT: uzp1.8h v2, v3, v2 140; CHECK-NEXT: uzp1.16b v0, v0, v2 141; CHECK-NEXT: str q0, [x1] 142; CHECK-NEXT: ret 143; 144; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_no_loop: 145; CHECK-BE: // %bb.0: // %entry 146; CHECK-BE-NEXT: add x8, x0, #16 147; CHECK-BE-NEXT: add x9, x0, #48 148; CHECK-BE-NEXT: add x10, x0, #32 149; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] 150; CHECK-BE-NEXT: ld1 { v1.4s }, [x8] 151; CHECK-BE-NEXT: ld1 { v2.4s }, [x9] 152; CHECK-BE-NEXT: ld1 { v3.4s }, [x10] 153; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h 154; CHECK-BE-NEXT: uzp1 v2.8h, v3.8h, v2.8h 155; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b 156; CHECK-BE-NEXT: st1 { v0.16b }, [x1] 157; CHECK-BE-NEXT: ret 158; 159; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_no_loop: 160; CHECK-DISABLE: // %bb.0: // %entry 161; CHECK-DISABLE-NEXT: add x8, x0, #16 162; CHECK-DISABLE-NEXT: add x9, x0, #48 163; CHECK-DISABLE-NEXT: add x10, x0, #32 164; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x0] 165; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x8] 166; CHECK-DISABLE-NEXT: ld1 { v2.4s }, [x9] 167; CHECK-DISABLE-NEXT: ld1 { v3.4s }, [x10] 168; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h 169; CHECK-DISABLE-NEXT: uzp1 v2.8h, v3.8h, v2.8h 170; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v2.16b 171; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x1] 172; CHECK-DISABLE-NEXT: ret 173entry: 174 %l.A = load <16 x i32>, ptr %A 175 %trunc = trunc <16 x i32> %l.A to <16 x i8> 176 store <16 x i8> %trunc, ptr %dst 177 ret void 178} 179 180 181; CHECK-LABEL: lCPI2_0: 182; CHECK-NEXT: .byte 0 ; 0x0 183; CHECK-NEXT: .byte 4 ; 0x4 184; CHECK-NEXT: .byte 8 ; 0x8 185; CHECK-NEXT: .byte 12 ; 0xc 186; CHECK-NEXT: .byte 16 ; 0x10 187; CHECK-NEXT: .byte 20 ; 0x14 188; CHECK-NEXT: .byte 24 ; 0x18 189; CHECK-NEXT: .byte 28 ; 0x1c 190; CHECK-NEXT: .byte 255 ; 0xff 191; CHECK-NEXT: .byte 255 ; 0xff 192; CHECK-NEXT: .byte 255 ; 0xff 193; CHECK-NEXT: .byte 255 ; 0xff 194; CHECK-NEXT: .byte 255 ; 0xff 195; CHECK-NEXT: .byte 255 ; 0xff 196; CHECK-NEXT: .byte 255 ; 0xff 197; CHECK-NEXT: .byte 255 ; 0xff 198 199; CHECK-BE-LABEL: .LCPI2_0: 200; CHECK-BE-NEXT: .byte 3 // 0x3 201; CHECK-BE-NEXT: .byte 7 // 0x7 202; CHECK-BE-NEXT: .byte 11 // 0xb 203; CHECK-BE-NEXT: .byte 15 // 0xf 204; CHECK-BE-NEXT: .byte 19 // 0x13 205; CHECK-BE-NEXT: .byte 23 // 0x17 206; CHECK-BE-NEXT: .byte 27 // 0x1b 207; CHECK-BE-NEXT: .byte 31 // 0x1f 208; CHECK-BE-NEXT: .byte 255 // 0xff 209; CHECK-BE-NEXT: .byte 255 // 0xff 210; CHECK-BE-NEXT: .byte 255 // 0xff 211; CHECK-BE-NEXT: .byte 255 // 0xff 212; CHECK-BE-NEXT: .byte 255 // 0xff 213; CHECK-BE-NEXT: .byte 255 // 0xff 214; CHECK-BE-NEXT: .byte 255 // 0xff 215; CHECK-BE-NEXT: .byte 255 // 0xff 216; It's profitable to use a single tbl.2 instruction to lower the truncate. 217define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) { 218; CHECK-LABEL: trunc_v8i32_to_v8i8_in_loop: 219; CHECK: ; %bb.0: ; %entry 220; CHECK-NEXT: Lloh2: 221; CHECK-NEXT: adrp x8, lCPI2_0@PAGE 222; CHECK-NEXT: Lloh3: 223; CHECK-NEXT: ldr q0, [x8, lCPI2_0@PAGEOFF] 224; CHECK-NEXT: mov x8, xzr 225; CHECK-NEXT: LBB2_1: ; %loop 226; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 227; CHECK-NEXT: add x9, x0, x8, lsl #5 228; CHECK-NEXT: ldp q1, q2, [x9] 229; CHECK-NEXT: tbl.16b v1, { v1, v2 }, v0 230; CHECK-NEXT: str d1, [x1, x8, lsl #3] 231; CHECK-NEXT: add x8, x8, #1 232; CHECK-NEXT: cmp x8, #1000 233; CHECK-NEXT: b.eq LBB2_1 234; CHECK-NEXT: ; %bb.2: ; %exit 235; CHECK-NEXT: ret 236; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 237; 238; CHECK-BE-LABEL: trunc_v8i32_to_v8i8_in_loop: 239; CHECK-BE: // %bb.0: // %entry 240; CHECK-BE-NEXT: adrp x8, .LCPI2_0 241; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI2_0 242; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] 243; CHECK-BE-NEXT: mov x8, xzr 244; CHECK-BE-NEXT: .LBB2_1: // %loop 245; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 246; CHECK-BE-NEXT: add x9, x0, x8, lsl #5 247; CHECK-BE-NEXT: add x10, x9, #16 248; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] 249; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 250; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] 251; CHECK-BE-NEXT: add x8, x8, #1 252; CHECK-BE-NEXT: cmp x8, #1000 253; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v0.16b 254; CHECK-BE-NEXT: st1 { v1.8b }, [x9] 255; CHECK-BE-NEXT: b.eq .LBB2_1 256; CHECK-BE-NEXT: // %bb.2: // %exit 257; CHECK-BE-NEXT: ret 258; 259; CHECK-DISABLE-LABEL: trunc_v8i32_to_v8i8_in_loop: 260; CHECK-DISABLE: // %bb.0: // %entry 261; CHECK-DISABLE-NEXT: mov x8, xzr 262; CHECK-DISABLE-NEXT: .LBB2_1: // %loop 263; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 264; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5 265; CHECK-DISABLE-NEXT: add x10, x9, #16 266; CHECK-DISABLE-NEXT: ld1 { v0.4s }, [x9] 267; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3 268; CHECK-DISABLE-NEXT: ld1 { v1.4s }, [x10] 269; CHECK-DISABLE-NEXT: add x8, x8, #1 270; CHECK-DISABLE-NEXT: cmp x8, #1000 271; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h 272; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h 273; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9] 274; CHECK-DISABLE-NEXT: b.eq .LBB2_1 275; CHECK-DISABLE-NEXT: // %bb.2: // %exit 276; CHECK-DISABLE-NEXT: ret 277entry: 278 br label %loop 279 280loop: 281 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 282 %gep.A = getelementptr inbounds <8 x i32>, ptr %A, i64 %iv 283 %l.A = load <8 x i32>, ptr %gep.A 284 %trunc = trunc <8 x i32> %l.A to <8 x i8> 285 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv 286 store <8 x i8> %trunc, ptr %gep.dst 287 %iv.next = add i64 %iv, 1 288 %ec = icmp eq i64 %iv.next, 1000 289 br i1 %ec, label %loop, label %exit 290 291exit: 292 ret void 293} 294 295; CHECK-LABEL: lCPI3_0: 296; CHECK-NEXT: .byte 0 ; 0x0 297; CHECK-NEXT: .byte 8 ; 0x8 298; CHECK-NEXT: .byte 16 ; 0x10 299; CHECK-NEXT: .byte 24 ; 0x18 300; CHECK-NEXT: .byte 32 ; 0x20 301; CHECK-NEXT: .byte 40 ; 0x28 302; CHECK-NEXT: .byte 48 ; 0x30 303; CHECK-NEXT: .byte 56 ; 0x38 304; CHECK-NEXT: .byte 64 ; 0x40 305; CHECK-NEXT: .byte 72 ; 0x48 306; CHECK-NEXT: .byte 80 ; 0x50 307; CHECK-NEXT: .byte 88 ; 0x58 308; CHECK-NEXT: .byte 96 ; 0x60 309; CHECK-NEXT: .byte 104 ; 0x68 310; CHECK-NEXT: .byte 112 ; 0x70 311; CHECK-NEXT: .byte 120 ; 0x78 312 313; CHECK-BE-LABEL: .LCPI3_0: 314; CHECK-BE-NEXT: .byte 7 // 0x7 315; CHECK-BE-NEXT: .byte 15 // 0xf 316; CHECK-BE-NEXT: .byte 23 // 0x17 317; CHECK-BE-NEXT: .byte 31 // 0x1f 318; CHECK-BE-NEXT: .byte 39 // 0x27 319; CHECK-BE-NEXT: .byte 47 // 0x2f 320; CHECK-BE-NEXT: .byte 55 // 0x37 321; CHECK-BE-NEXT: .byte 63 // 0x3f 322; CHECK-BE-NEXT: .byte 71 // 0x47 323; CHECK-BE-NEXT: .byte 79 // 0x4f 324; CHECK-BE-NEXT: .byte 87 // 0x57 325; CHECK-BE-NEXT: .byte 95 // 0x5f 326; CHECK-BE-NEXT: .byte 103 // 0x67 327; CHECK-BE-NEXT: .byte 111 // 0x6f 328; CHECK-BE-NEXT: .byte 119 // 0x77 329; CHECK-BE-NEXT: .byte 127 // 0x7f 330define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) { 331; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop: 332; CHECK: ; %bb.0: ; %entry 333; CHECK-NEXT: Lloh4: 334; CHECK-NEXT: adrp x8, lCPI3_0@PAGE 335; CHECK-NEXT: Lloh5: 336; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF] 337; CHECK-NEXT: mov x8, xzr 338; CHECK-NEXT: LBB3_1: ; %loop 339; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 340; CHECK-NEXT: add x9, x0, x8, lsl #7 341; CHECK-NEXT: ldp q1, q2, [x9] 342; CHECK-NEXT: ldp q16, q17, [x9, #64] 343; CHECK-NEXT: ldp q3, q4, [x9, #32] 344; CHECK-NEXT: ldp q18, q19, [x9, #96] 345; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 346; CHECK-NEXT: tbl.16b v2, { v16, v17, v18, v19 }, v0 347; CHECK-NEXT: mov.d v1[1], v2[0] 348; CHECK-NEXT: str q1, [x1, x8, lsl #4] 349; CHECK-NEXT: add x8, x8, #1 350; CHECK-NEXT: cmp x8, #1000 351; CHECK-NEXT: b.eq LBB3_1 352; CHECK-NEXT: ; %bb.2: ; %exit 353; CHECK-NEXT: ret 354; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 355; 356; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop: 357; CHECK-BE: // %bb.0: // %entry 358; CHECK-BE-NEXT: adrp x8, .LCPI3_0 359; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI3_0 360; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] 361; CHECK-BE-NEXT: mov x8, xzr 362; CHECK-BE-NEXT: .LBB3_1: // %loop 363; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 364; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 365; CHECK-BE-NEXT: add x13, x9, #64 366; CHECK-BE-NEXT: add x12, x9, #80 367; CHECK-BE-NEXT: add x14, x9, #16 368; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] 369; CHECK-BE-NEXT: ld1 { v16.16b }, [x13] 370; CHECK-BE-NEXT: add x11, x9, #96 371; CHECK-BE-NEXT: add x13, x9, #32 372; CHECK-BE-NEXT: ld1 { v2.16b }, [x14] 373; CHECK-BE-NEXT: ld1 { v17.16b }, [x12] 374; CHECK-BE-NEXT: add x10, x9, #112 375; CHECK-BE-NEXT: add x9, x9, #48 376; CHECK-BE-NEXT: ld1 { v3.16b }, [x13] 377; CHECK-BE-NEXT: ld1 { v18.16b }, [x11] 378; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] 379; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 380; CHECK-BE-NEXT: ld1 { v19.16b }, [x10] 381; CHECK-BE-NEXT: add x8, x8, #1 382; CHECK-BE-NEXT: cmp x8, #1000 383; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b 384; CHECK-BE-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b 385; CHECK-BE-NEXT: mov v1.d[1], v2.d[0] 386; CHECK-BE-NEXT: st1 { v1.16b }, [x9] 387; CHECK-BE-NEXT: b.eq .LBB3_1 388; CHECK-BE-NEXT: // %bb.2: // %exit 389; CHECK-BE-NEXT: ret 390; 391; CHECK-DISABLE-LABEL: trunc_v16i64_to_v16i8_in_loop: 392; CHECK-DISABLE: // %bb.0: // %entry 393; CHECK-DISABLE-NEXT: mov x8, xzr 394; CHECK-DISABLE-NEXT: .LBB3_1: // %loop 395; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 396; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #7 397; CHECK-DISABLE-NEXT: add x10, x9, #16 398; CHECK-DISABLE-NEXT: add x11, x9, #48 399; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9] 400; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10] 401; CHECK-DISABLE-NEXT: add x10, x9, #112 402; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11] 403; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x10] 404; CHECK-DISABLE-NEXT: add x10, x9, #96 405; CHECK-DISABLE-NEXT: add x11, x9, #32 406; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10] 407; CHECK-DISABLE-NEXT: add x10, x9, #80 408; CHECK-DISABLE-NEXT: add x9, x9, #64 409; CHECK-DISABLE-NEXT: ld1 { v5.2d }, [x11] 410; CHECK-DISABLE-NEXT: ld1 { v6.2d }, [x10] 411; CHECK-DISABLE-NEXT: ld1 { v7.2d }, [x9] 412; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s 413; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4 414; CHECK-DISABLE-NEXT: add x8, x8, #1 415; CHECK-DISABLE-NEXT: uzp1 v3.4s, v4.4s, v3.4s 416; CHECK-DISABLE-NEXT: cmp x8, #1000 417; CHECK-DISABLE-NEXT: uzp1 v4.4s, v7.4s, v6.4s 418; CHECK-DISABLE-NEXT: uzp1 v2.4s, v5.4s, v2.4s 419; CHECK-DISABLE-NEXT: uzp1 v1.8h, v4.8h, v3.8h 420; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h 421; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b 422; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9] 423; CHECK-DISABLE-NEXT: b.eq .LBB3_1 424; CHECK-DISABLE-NEXT: // %bb.2: // %exit 425; CHECK-DISABLE-NEXT: ret 426entry: 427 br label %loop 428 429loop: 430 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 431 %gep.A = getelementptr inbounds <16 x i64>, ptr %A, i64 %iv 432 %l.A = load <16 x i64>, ptr %gep.A 433 %trunc = trunc <16 x i64> %l.A to <16 x i8> 434 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv 435 store <16 x i8> %trunc, ptr %gep.dst 436 %iv.next = add i64 %iv, 1 437 %ec = icmp eq i64 %iv.next, 1000 438 br i1 %ec, label %loop, label %exit 439 440exit: 441 ret void 442} 443 444; CHECK-LABEL: lCPI4_0: 445; CHECK-NEXT: .byte 0 ; 0x0 446; CHECK-NEXT: .byte 8 ; 0x8 447; CHECK-NEXT: .byte 16 ; 0x10 448; CHECK-NEXT: .byte 24 ; 0x18 449; CHECK-NEXT: .byte 32 ; 0x20 450; CHECK-NEXT: .byte 40 ; 0x28 451; CHECK-NEXT: .byte 48 ; 0x30 452; CHECK-NEXT: .byte 56 ; 0x38 453; CHECK-NEXT: .byte 255 ; 0xff 454; CHECK-NEXT: .byte 255 ; 0xff 455; CHECK-NEXT: .byte 255 ; 0xff 456; CHECK-NEXT: .byte 255 ; 0xff 457; CHECK-NEXT: .byte 255 ; 0xff 458; CHECK-NEXT: .byte 255 ; 0xff 459; CHECK-NEXT: .byte 255 ; 0xff 460; CHECK-NEXT: .byte 255 ; 0xff 461 462; CHECK-BE-LABEL: .LCPI4_0: 463; CHECK-BE-NEXT: .byte 7 // 0x7 464; CHECK-BE-NEXT: .byte 15 // 0xf 465; CHECK-BE-NEXT: .byte 23 // 0x17 466; CHECK-BE-NEXT: .byte 31 // 0x1f 467; CHECK-BE-NEXT: .byte 39 // 0x27 468; CHECK-BE-NEXT: .byte 47 // 0x2f 469; CHECK-BE-NEXT: .byte 55 // 0x37 470; CHECK-BE-NEXT: .byte 63 // 0x3f 471; CHECK-BE-NEXT: .byte 255 // 0xff 472; CHECK-BE-NEXT: .byte 255 // 0xff 473; CHECK-BE-NEXT: .byte 255 // 0xff 474; CHECK-BE-NEXT: .byte 255 // 0xff 475; CHECK-BE-NEXT: .byte 255 // 0xff 476; CHECK-BE-NEXT: .byte 255 // 0xff 477; CHECK-BE-NEXT: .byte 255 // 0xff 478; CHECK-BE-NEXT: .byte 255 // 0xff 479define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) { 480; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop: 481; CHECK: ; %bb.0: ; %entry 482; CHECK-NEXT: Lloh6: 483; CHECK-NEXT: adrp x8, lCPI4_0@PAGE 484; CHECK-NEXT: Lloh7: 485; CHECK-NEXT: ldr q0, [x8, lCPI4_0@PAGEOFF] 486; CHECK-NEXT: mov x8, xzr 487; CHECK-NEXT: LBB4_1: ; %loop 488; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 489; CHECK-NEXT: add x9, x0, x8, lsl #6 490; CHECK-NEXT: ldp q1, q2, [x9] 491; CHECK-NEXT: ldp q3, q4, [x9, #32] 492; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 493; CHECK-NEXT: str d1, [x1, x8, lsl #3] 494; CHECK-NEXT: add x8, x8, #1 495; CHECK-NEXT: cmp x8, #1000 496; CHECK-NEXT: b.eq LBB4_1 497; CHECK-NEXT: ; %bb.2: ; %exit 498; CHECK-NEXT: ret 499; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 500; 501; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop: 502; CHECK-BE: // %bb.0: // %entry 503; CHECK-BE-NEXT: adrp x8, .LCPI4_0 504; CHECK-BE-NEXT: add x8, x8, :lo12:.LCPI4_0 505; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] 506; CHECK-BE-NEXT: mov x8, xzr 507; CHECK-BE-NEXT: .LBB4_1: // %loop 508; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 509; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 510; CHECK-BE-NEXT: add x10, x9, #16 511; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] 512; CHECK-BE-NEXT: add x11, x9, #32 513; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] 514; CHECK-BE-NEXT: add x9, x9, #48 515; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] 516; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] 517; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 518; CHECK-BE-NEXT: add x8, x8, #1 519; CHECK-BE-NEXT: cmp x8, #1000 520; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b 521; CHECK-BE-NEXT: st1 { v1.8b }, [x9] 522; CHECK-BE-NEXT: b.eq .LBB4_1 523; CHECK-BE-NEXT: // %bb.2: // %exit 524; CHECK-BE-NEXT: ret 525; 526; CHECK-DISABLE-LABEL: trunc_v8i64_to_v8i8_in_loop: 527; CHECK-DISABLE: // %bb.0: // %entry 528; CHECK-DISABLE-NEXT: mov x8, xzr 529; CHECK-DISABLE-NEXT: .LBB4_1: // %loop 530; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 531; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #6 532; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9] 533; CHECK-DISABLE-NEXT: add x10, x9, #16 534; CHECK-DISABLE-NEXT: add x11, x9, #48 535; CHECK-DISABLE-NEXT: add x9, x9, #32 536; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10] 537; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x11] 538; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x9] 539; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3 540; CHECK-DISABLE-NEXT: add x8, x8, #1 541; CHECK-DISABLE-NEXT: cmp x8, #1000 542; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v1.4s 543; CHECK-DISABLE-NEXT: uzp1 v2.4s, v3.4s, v2.4s 544; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v2.8h 545; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h 546; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9] 547; CHECK-DISABLE-NEXT: b.eq .LBB4_1 548; CHECK-DISABLE-NEXT: // %bb.2: // %exit 549; CHECK-DISABLE-NEXT: ret 550entry: 551 br label %loop 552 553loop: 554 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 555 %gep.A = getelementptr inbounds <8 x i64>, ptr %A, i64 %iv 556 %l.A = load <8 x i64>, ptr %gep.A 557 %trunc = trunc <8 x i64> %l.A to <8 x i8> 558 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv 559 store <8 x i8> %trunc, ptr %gep.dst 560 %iv.next = add i64 %iv, 1 561 %ec = icmp eq i64 %iv.next, 1000 562 br i1 %ec, label %loop, label %exit 563 564exit: 565 ret void 566} 567 568define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) { 569; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop: 570; CHECK: ; %bb.0: ; %entry 571; CHECK-NEXT: mov x8, xzr 572; CHECK-NEXT: LBB5_1: ; %loop 573; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 574; CHECK-NEXT: ldp x9, x10, [x0] 575; CHECK-NEXT: ldrb w14, [x0, #18] 576; CHECK-NEXT: ldrh w15, [x0, #16] 577; CHECK-NEXT: add x0, x0, #32 578; CHECK-NEXT: lsr w12, w10, #12 579; CHECK-NEXT: fmov s1, w9 580; CHECK-NEXT: lsr x11, x9, #19 581; CHECK-NEXT: lsr x13, x10, #31 582; CHECK-NEXT: fmov s0, w12 583; CHECK-NEXT: lsr x12, x9, #38 584; CHECK-NEXT: extr x9, x10, x9, #57 585; CHECK-NEXT: mov.s v1[1], w11 586; CHECK-NEXT: orr x11, x15, x14, lsl #16 587; CHECK-NEXT: mov.s v0[1], w13 588; CHECK-NEXT: extr x13, x11, x10, #50 589; CHECK-NEXT: lsr w10, w11, #5 590; CHECK-NEXT: mov.s v1[2], w12 591; CHECK-NEXT: mov.s v0[2], w13 592; CHECK-NEXT: mov.s v1[3], w9 593; CHECK-NEXT: mov.s v0[3], w10 594; CHECK-NEXT: uzp1.8h v0, v1, v0 595; CHECK-NEXT: xtn.8b v0, v0 596; CHECK-NEXT: str d0, [x1, x8, lsl #3] 597; CHECK-NEXT: add x8, x8, #1 598; CHECK-NEXT: cmp x8, #1000 599; CHECK-NEXT: b.eq LBB5_1 600; CHECK-NEXT: ; %bb.2: ; %exit 601; CHECK-NEXT: ret 602; 603; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop: 604; CHECK-BE: // %bb.0: // %entry 605; CHECK-BE-NEXT: mov x8, xzr 606; CHECK-BE-NEXT: .LBB5_1: // %loop 607; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 608; CHECK-BE-NEXT: ldp x10, x9, [x0] 609; CHECK-BE-NEXT: ldrh w16, [x0, #16] 610; CHECK-BE-NEXT: ldrb w17, [x0, #18] 611; CHECK-BE-NEXT: add x0, x0, #32 612; CHECK-BE-NEXT: lsl x11, x9, #24 613; CHECK-BE-NEXT: lsr x12, x9, #40 614; CHECK-BE-NEXT: lsr x13, x10, #45 615; CHECK-BE-NEXT: lsl x14, x10, #24 616; CHECK-BE-NEXT: lsr x15, x10, #40 617; CHECK-BE-NEXT: extr x12, x12, x11, #57 618; CHECK-BE-NEXT: fmov s0, w13 619; CHECK-BE-NEXT: lsr w13, w10, #7 620; CHECK-BE-NEXT: extr x14, x15, x14, #50 621; CHECK-BE-NEXT: lsr w15, w9, #14 622; CHECK-BE-NEXT: extr x9, x10, x9, #40 623; CHECK-BE-NEXT: fmov s1, w12 624; CHECK-BE-NEXT: orr w12, w17, w16, lsl #8 625; CHECK-BE-NEXT: mov v0.s[1], w14 626; CHECK-BE-NEXT: lsr w9, w9, #12 627; CHECK-BE-NEXT: orr w11, w12, w11 628; CHECK-BE-NEXT: mov v1.s[1], w15 629; CHECK-BE-NEXT: lsr w11, w11, #19 630; CHECK-BE-NEXT: mov v0.s[2], w13 631; CHECK-BE-NEXT: mov v1.s[2], w11 632; CHECK-BE-NEXT: mov v0.s[3], w9 633; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 634; CHECK-BE-NEXT: add x8, x8, #1 635; CHECK-BE-NEXT: cmp x8, #1000 636; CHECK-BE-NEXT: mov v1.s[3], w12 637; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h 638; CHECK-BE-NEXT: xtn v0.8b, v0.8h 639; CHECK-BE-NEXT: st1 { v0.8b }, [x9] 640; CHECK-BE-NEXT: b.eq .LBB5_1 641; CHECK-BE-NEXT: // %bb.2: // %exit 642; CHECK-BE-NEXT: ret 643; 644; CHECK-DISABLE-LABEL: trunc_v8i19_to_v8i8_in_loop: 645; CHECK-DISABLE: // %bb.0: // %entry 646; CHECK-DISABLE-NEXT: mov x8, xzr 647; CHECK-DISABLE-NEXT: .LBB5_1: // %loop 648; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 649; CHECK-DISABLE-NEXT: ldp x10, x9, [x0] 650; CHECK-DISABLE-NEXT: ldrh w16, [x0, #16] 651; CHECK-DISABLE-NEXT: ldrb w17, [x0, #18] 652; CHECK-DISABLE-NEXT: add x0, x0, #32 653; CHECK-DISABLE-NEXT: lsl x11, x9, #24 654; CHECK-DISABLE-NEXT: lsr x12, x9, #40 655; CHECK-DISABLE-NEXT: lsr x13, x10, #45 656; CHECK-DISABLE-NEXT: lsl x14, x10, #24 657; CHECK-DISABLE-NEXT: lsr x15, x10, #40 658; CHECK-DISABLE-NEXT: extr x12, x12, x11, #57 659; CHECK-DISABLE-NEXT: fmov s0, w13 660; CHECK-DISABLE-NEXT: lsr w13, w10, #7 661; CHECK-DISABLE-NEXT: extr x14, x15, x14, #50 662; CHECK-DISABLE-NEXT: lsr w15, w9, #14 663; CHECK-DISABLE-NEXT: extr x9, x10, x9, #40 664; CHECK-DISABLE-NEXT: fmov s1, w12 665; CHECK-DISABLE-NEXT: orr w12, w17, w16, lsl #8 666; CHECK-DISABLE-NEXT: mov v0.s[1], w14 667; CHECK-DISABLE-NEXT: lsr w9, w9, #12 668; CHECK-DISABLE-NEXT: orr w11, w12, w11 669; CHECK-DISABLE-NEXT: mov v1.s[1], w15 670; CHECK-DISABLE-NEXT: lsr w11, w11, #19 671; CHECK-DISABLE-NEXT: mov v0.s[2], w13 672; CHECK-DISABLE-NEXT: mov v1.s[2], w11 673; CHECK-DISABLE-NEXT: mov v0.s[3], w9 674; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3 675; CHECK-DISABLE-NEXT: add x8, x8, #1 676; CHECK-DISABLE-NEXT: cmp x8, #1000 677; CHECK-DISABLE-NEXT: mov v1.s[3], w12 678; CHECK-DISABLE-NEXT: uzp1 v0.8h, v0.8h, v1.8h 679; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h 680; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9] 681; CHECK-DISABLE-NEXT: b.eq .LBB5_1 682; CHECK-DISABLE-NEXT: // %bb.2: // %exit 683; CHECK-DISABLE-NEXT: ret 684entry: 685 br label %loop 686 687loop: 688 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 689 %gep.A = getelementptr inbounds <8 x i19>, ptr %A, i64 %iv 690 %l.A = load <8 x i19>, ptr %gep.A 691 %trunc = trunc <8 x i19> %l.A to <8 x i8> 692 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv 693 store <8 x i8> %trunc, ptr %gep.dst 694 %iv.next = add i64 %iv, 1 695 %ec = icmp eq i64 %iv.next, 1000 696 br i1 %ec, label %loop, label %exit 697 698exit: 699 ret void 700} 701 702define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) { 703; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop: 704; CHECK: ; %bb.0: ; %entry 705; CHECK-NEXT: mov w8, #1000 ; =0x3e8 706; CHECK-NEXT: LBB6_1: ; %loop 707; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 708; CHECK-NEXT: ldp q4, q0, [x0, #48] 709; CHECK-NEXT: add x9, x1, #10 710; CHECK-NEXT: ldr d1, [x0, #80] 711; CHECK-NEXT: ldp q3, q2, [x0] 712; CHECK-NEXT: ldr q5, [x0, #32] 713; CHECK-NEXT: subs x8, x8, #1 714; CHECK-NEXT: add x0, x0, #128 715; CHECK-NEXT: uzp1.4s v0, v0, v1 716; CHECK-NEXT: uzp1.4s v1, v5, v4 717; CHECK-NEXT: uzp1.4s v2, v3, v2 718; CHECK-NEXT: xtn.4h v0, v0 719; CHECK-NEXT: uzp1.8h v1, v2, v1 720; CHECK-NEXT: uzp1.8b v2, v0, v0 721; CHECK-NEXT: uzp1.16b v0, v1, v0 722; CHECK-NEXT: st1.b { v2 }[2], [x9] 723; CHECK-NEXT: add x9, x1, #8 724; CHECK-NEXT: st1.h { v0 }[4], [x9] 725; CHECK-NEXT: str d0, [x1], #16 726; CHECK-NEXT: b.eq LBB6_1 727; CHECK-NEXT: ; %bb.2: ; %exit 728; CHECK-NEXT: ret 729; 730; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop: 731; CHECK-BE: // %bb.0: // %entry 732; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8 733; CHECK-BE-NEXT: .LBB6_1: // %loop 734; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 735; CHECK-BE-NEXT: add x9, x0, #64 736; CHECK-BE-NEXT: add x10, x0, #16 737; CHECK-BE-NEXT: ld1 { v3.2d }, [x0] 738; CHECK-BE-NEXT: ld1 { v0.2d }, [x9] 739; CHECK-BE-NEXT: add x9, x0, #48 740; CHECK-BE-NEXT: ld1 { v1.2d }, [x10] 741; CHECK-BE-NEXT: add x10, x0, #32 742; CHECK-BE-NEXT: ld1 { v2.2d }, [x9] 743; CHECK-BE-NEXT: ldr d5, [x0, #80] 744; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] 745; CHECK-BE-NEXT: add x9, x1, #10 746; CHECK-BE-NEXT: subs x8, x8, #1 747; CHECK-BE-NEXT: uzp1 v1.4s, v3.4s, v1.4s 748; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v5.4s 749; CHECK-BE-NEXT: add x0, x0, #128 750; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s 751; CHECK-BE-NEXT: xtn v0.4h, v0.4s 752; CHECK-BE-NEXT: uzp1 v1.8h, v1.8h, v2.8h 753; CHECK-BE-NEXT: uzp1 v1.16b, v1.16b, v0.16b 754; CHECK-BE-NEXT: uzp1 v0.8b, v0.8b, v0.8b 755; CHECK-BE-NEXT: rev16 v2.16b, v1.16b 756; CHECK-BE-NEXT: rev64 v1.16b, v1.16b 757; CHECK-BE-NEXT: st1 { v0.b }[2], [x9] 758; CHECK-BE-NEXT: add x9, x1, #8 759; CHECK-BE-NEXT: st1 { v2.h }[4], [x9] 760; CHECK-BE-NEXT: str d1, [x1], #16 761; CHECK-BE-NEXT: b.eq .LBB6_1 762; CHECK-BE-NEXT: // %bb.2: // %exit 763; CHECK-BE-NEXT: ret 764; 765; CHECK-DISABLE-LABEL: trunc_v11i64_to_v11i8_in_loop: 766; CHECK-DISABLE: // %bb.0: // %entry 767; CHECK-DISABLE-NEXT: mov w8, #1000 // =0x3e8 768; CHECK-DISABLE-NEXT: .LBB6_1: // %loop 769; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 770; CHECK-DISABLE-NEXT: add x9, x0, #64 771; CHECK-DISABLE-NEXT: add x10, x0, #16 772; CHECK-DISABLE-NEXT: ld1 { v3.2d }, [x0] 773; CHECK-DISABLE-NEXT: ld1 { v0.2d }, [x9] 774; CHECK-DISABLE-NEXT: add x9, x0, #48 775; CHECK-DISABLE-NEXT: ld1 { v1.2d }, [x10] 776; CHECK-DISABLE-NEXT: add x10, x0, #32 777; CHECK-DISABLE-NEXT: ld1 { v2.2d }, [x9] 778; CHECK-DISABLE-NEXT: ldr d5, [x0, #80] 779; CHECK-DISABLE-NEXT: ld1 { v4.2d }, [x10] 780; CHECK-DISABLE-NEXT: add x9, x1, #10 781; CHECK-DISABLE-NEXT: subs x8, x8, #1 782; CHECK-DISABLE-NEXT: uzp1 v1.4s, v3.4s, v1.4s 783; CHECK-DISABLE-NEXT: uzp1 v0.4s, v0.4s, v5.4s 784; CHECK-DISABLE-NEXT: add x0, x0, #128 785; CHECK-DISABLE-NEXT: uzp1 v2.4s, v4.4s, v2.4s 786; CHECK-DISABLE-NEXT: xtn v0.4h, v0.4s 787; CHECK-DISABLE-NEXT: uzp1 v1.8h, v1.8h, v2.8h 788; CHECK-DISABLE-NEXT: uzp1 v1.16b, v1.16b, v0.16b 789; CHECK-DISABLE-NEXT: uzp1 v0.8b, v0.8b, v0.8b 790; CHECK-DISABLE-NEXT: rev16 v2.16b, v1.16b 791; CHECK-DISABLE-NEXT: rev64 v1.16b, v1.16b 792; CHECK-DISABLE-NEXT: st1 { v0.b }[2], [x9] 793; CHECK-DISABLE-NEXT: add x9, x1, #8 794; CHECK-DISABLE-NEXT: st1 { v2.h }[4], [x9] 795; CHECK-DISABLE-NEXT: str d1, [x1], #16 796; CHECK-DISABLE-NEXT: b.eq .LBB6_1 797; CHECK-DISABLE-NEXT: // %bb.2: // %exit 798; CHECK-DISABLE-NEXT: ret 799entry: 800 br label %loop 801 802loop: 803 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 804 %gep.A = getelementptr inbounds <11 x i64>, ptr %A, i64 %iv 805 %l.A = load <11 x i64>, ptr %gep.A 806 %trunc = trunc <11 x i64> %l.A to <11 x i8> 807 %gep.dst = getelementptr inbounds <11 x i8>, ptr %dst, i64 %iv 808 store <11 x i8> %trunc, ptr %gep.dst 809 %iv.next = add i64 %iv, 1 810 %ec = icmp eq i64 %iv.next, 1000 811 br i1 %ec, label %loop, label %exit 812 813exit: 814 ret void 815} 816 817define void @trunc_v16i16_to_v16i8_in_loop(ptr %A, ptr %dst) { 818; CHECK-LABEL: trunc_v16i16_to_v16i8_in_loop: 819; CHECK: ; %bb.0: ; %entry 820; CHECK-NEXT: mov x8, xzr 821; CHECK-NEXT: LBB7_1: ; %loop 822; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 823; CHECK-NEXT: add x9, x0, x8, lsl #5 824; CHECK-NEXT: ldp q1, q0, [x9] 825; CHECK-NEXT: uzp1.16b v0, v1, v0 826; CHECK-NEXT: str q0, [x1, x8, lsl #4] 827; CHECK-NEXT: add x8, x8, #1 828; CHECK-NEXT: cmp x8, #1000 829; CHECK-NEXT: b.eq LBB7_1 830; CHECK-NEXT: ; %bb.2: ; %exit 831; CHECK-NEXT: ret 832; 833; CHECK-BE-LABEL: trunc_v16i16_to_v16i8_in_loop: 834; CHECK-BE: // %bb.0: // %entry 835; CHECK-BE-NEXT: mov x8, xzr 836; CHECK-BE-NEXT: .LBB7_1: // %loop 837; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 838; CHECK-BE-NEXT: add x9, x0, x8, lsl #5 839; CHECK-BE-NEXT: add x10, x9, #16 840; CHECK-BE-NEXT: ld1 { v0.8h }, [x9] 841; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 842; CHECK-BE-NEXT: ld1 { v1.8h }, [x10] 843; CHECK-BE-NEXT: add x8, x8, #1 844; CHECK-BE-NEXT: cmp x8, #1000 845; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b 846; CHECK-BE-NEXT: st1 { v0.16b }, [x9] 847; CHECK-BE-NEXT: b.eq .LBB7_1 848; CHECK-BE-NEXT: // %bb.2: // %exit 849; CHECK-BE-NEXT: ret 850; 851; CHECK-DISABLE-LABEL: trunc_v16i16_to_v16i8_in_loop: 852; CHECK-DISABLE: // %bb.0: // %entry 853; CHECK-DISABLE-NEXT: mov x8, xzr 854; CHECK-DISABLE-NEXT: .LBB7_1: // %loop 855; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 856; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #5 857; CHECK-DISABLE-NEXT: add x10, x9, #16 858; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9] 859; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #4 860; CHECK-DISABLE-NEXT: ld1 { v1.8h }, [x10] 861; CHECK-DISABLE-NEXT: add x8, x8, #1 862; CHECK-DISABLE-NEXT: cmp x8, #1000 863; CHECK-DISABLE-NEXT: uzp1 v0.16b, v0.16b, v1.16b 864; CHECK-DISABLE-NEXT: st1 { v0.16b }, [x9] 865; CHECK-DISABLE-NEXT: b.eq .LBB7_1 866; CHECK-DISABLE-NEXT: // %bb.2: // %exit 867; CHECK-DISABLE-NEXT: ret 868entry: 869 br label %loop 870 871loop: 872 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 873 %gep.A = getelementptr inbounds <16 x i16>, ptr %A, i64 %iv 874 %l.A = load <16 x i16>, ptr %gep.A 875 %trunc = trunc <16 x i16> %l.A to <16 x i8> 876 %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv 877 store <16 x i8> %trunc, ptr %gep.dst 878 %iv.next = add i64 %iv, 1 879 %ec = icmp eq i64 %iv.next, 1000 880 br i1 %ec, label %loop, label %exit 881 882exit: 883 ret void 884} 885 886define void @trunc_v8i16_to_v8i8_in_loop(ptr %A, ptr %dst) { 887; CHECK-LABEL: trunc_v8i16_to_v8i8_in_loop: 888; CHECK: ; %bb.0: ; %entry 889; CHECK-NEXT: mov x8, xzr 890; CHECK-NEXT: LBB8_1: ; %loop 891; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 892; CHECK-NEXT: ldr q0, [x0, x8, lsl #4] 893; CHECK-NEXT: xtn.8b v0, v0 894; CHECK-NEXT: str d0, [x1, x8, lsl #3] 895; CHECK-NEXT: add x8, x8, #1 896; CHECK-NEXT: cmp x8, #1000 897; CHECK-NEXT: b.eq LBB8_1 898; CHECK-NEXT: ; %bb.2: ; %exit 899; CHECK-NEXT: ret 900; 901; CHECK-BE-LABEL: trunc_v8i16_to_v8i8_in_loop: 902; CHECK-BE: // %bb.0: // %entry 903; CHECK-BE-NEXT: mov x8, xzr 904; CHECK-BE-NEXT: .LBB8_1: // %loop 905; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 906; CHECK-BE-NEXT: add x9, x0, x8, lsl #4 907; CHECK-BE-NEXT: ld1 { v0.8h }, [x9] 908; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 909; CHECK-BE-NEXT: add x8, x8, #1 910; CHECK-BE-NEXT: cmp x8, #1000 911; CHECK-BE-NEXT: xtn v0.8b, v0.8h 912; CHECK-BE-NEXT: st1 { v0.8b }, [x9] 913; CHECK-BE-NEXT: b.eq .LBB8_1 914; CHECK-BE-NEXT: // %bb.2: // %exit 915; CHECK-BE-NEXT: ret 916; 917; CHECK-DISABLE-LABEL: trunc_v8i16_to_v8i8_in_loop: 918; CHECK-DISABLE: // %bb.0: // %entry 919; CHECK-DISABLE-NEXT: mov x8, xzr 920; CHECK-DISABLE-NEXT: .LBB8_1: // %loop 921; CHECK-DISABLE-NEXT: // =>This Inner Loop Header: Depth=1 922; CHECK-DISABLE-NEXT: add x9, x0, x8, lsl #4 923; CHECK-DISABLE-NEXT: ld1 { v0.8h }, [x9] 924; CHECK-DISABLE-NEXT: add x9, x1, x8, lsl #3 925; CHECK-DISABLE-NEXT: add x8, x8, #1 926; CHECK-DISABLE-NEXT: cmp x8, #1000 927; CHECK-DISABLE-NEXT: xtn v0.8b, v0.8h 928; CHECK-DISABLE-NEXT: st1 { v0.8b }, [x9] 929; CHECK-DISABLE-NEXT: b.eq .LBB8_1 930; CHECK-DISABLE-NEXT: // %bb.2: // %exit 931; CHECK-DISABLE-NEXT: ret 932entry: 933 br label %loop 934 935loop: 936 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 937 %gep.A = getelementptr inbounds <8 x i16>, ptr %A, i64 %iv 938 %l.A = load <8 x i16>, ptr %gep.A 939 %trunc = trunc <8 x i16> %l.A to <8 x i8> 940 %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv 941 store <8 x i8> %trunc, ptr %gep.dst 942 %iv.next = add i64 %iv, 1 943 %ec = icmp eq i64 %iv.next, 1000 944 br i1 %ec, label %loop, label %exit 945 946exit: 947 ret void 948} 949 950