1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 { 9; CHECK-LABEL: store_trunc_v2i64i8: 10; CHECK: // %bb.0: 11; CHECK-NEXT: ptrue p0.d, vl2 12; CHECK-NEXT: ldr q0, [x0] 13; CHECK-NEXT: st1b { z0.d }, p0, [x1] 14; CHECK-NEXT: ret 15 %a = load <2 x i64>, ptr %ap 16 %val = trunc <2 x i64> %a to <2 x i8> 17 store <2 x i8> %val, ptr %dest 18 ret void 19} 20 21define void @store_trunc_v4i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 { 22; CHECK-LABEL: store_trunc_v4i64i8: 23; CHECK: // %bb.0: 24; CHECK-NEXT: ptrue p0.d, vl4 25; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 26; CHECK-NEXT: st1b { z0.d }, p0, [x1] 27; CHECK-NEXT: ret 28 %a = load <4 x i64>, ptr %ap 29 %val = trunc <4 x i64> %a to <4 x i8> 30 store <4 x i8> %val, ptr %dest 31 ret void 32} 33 34define void @store_trunc_v8i64i8(ptr %ap, ptr %dest) #0 { 35; VBITS_GE_256-LABEL: store_trunc_v8i64i8: 36; VBITS_GE_256: // %bb.0: 37; VBITS_GE_256-NEXT: ptrue p0.d, vl4 38; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 39; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 40; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 41; VBITS_GE_256-NEXT: ptrue p0.s, vl4 42; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 43; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 44; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s 45; VBITS_GE_256-NEXT: ptrue p0.s, vl8 46; VBITS_GE_256-NEXT: st1b { z1.s }, p0, [x1] 47; VBITS_GE_256-NEXT: ret 48; 49; VBITS_GE_512-LABEL: store_trunc_v8i64i8: 50; VBITS_GE_512: // %bb.0: 51; VBITS_GE_512-NEXT: ptrue p0.d, vl8 52; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 53; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [x1] 54; VBITS_GE_512-NEXT: ret 55 %a = load <8 x i64>, ptr %ap 56 %val = trunc <8 x i64> %a to <8 x i8> 57 store <8 x i8> %val, ptr %dest 58 ret void 59} 60 61define void @store_trunc_v16i64i8(ptr %ap, ptr %dest) vscale_range(8,0) #0 { 62; CHECK-LABEL: store_trunc_v16i64i8: 63; CHECK: // %bb.0: 64; CHECK-NEXT: ptrue p0.d, vl16 65; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 66; CHECK-NEXT: st1b { z0.d }, p0, [x1] 67; CHECK-NEXT: ret 68 %a = load <16 x i64>, ptr %ap 69 %val = trunc <16 x i64> %a to <16 x i8> 70 store <16 x i8> %val, ptr %dest 71 ret void 72} 73 74define void @store_trunc_v32i64i8(ptr %ap, ptr %dest) vscale_range(16,0) #0 { 75; CHECK-LABEL: store_trunc_v32i64i8: 76; CHECK: // %bb.0: 77; CHECK-NEXT: ptrue p0.d, vl32 78; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 79; CHECK-NEXT: st1b { z0.d }, p0, [x1] 80; CHECK-NEXT: ret 81 %a = load <32 x i64>, ptr %ap 82 %val = trunc <32 x i64> %a to <32 x i8> 83 store <32 x i8> %val, ptr %dest 84 ret void 85} 86 87define void @store_trunc_v8i64i16(ptr %ap, ptr %dest) #0 { 88; Currently does not use the truncating store 89; VBITS_GE_256-LABEL: store_trunc_v8i64i16: 90; VBITS_GE_256: // %bb.0: 91; VBITS_GE_256-NEXT: ptrue p0.d, vl4 92; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 93; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 94; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 95; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 96; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 97; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 98; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 99; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] 100; VBITS_GE_256-NEXT: str q1, [x1] 101; VBITS_GE_256-NEXT: ret 102; 103; VBITS_GE_512-LABEL: store_trunc_v8i64i16: 104; VBITS_GE_512: // %bb.0: 105; VBITS_GE_512-NEXT: ptrue p0.d, vl8 106; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 107; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [x1] 108; VBITS_GE_512-NEXT: ret 109 %a = load <8 x i64>, ptr %ap 110 %val = trunc <8 x i64> %a to <8 x i16> 111 store <8 x i16> %val, ptr %dest 112 ret void 113} 114 115define void @store_trunc_v8i64i32(ptr %ap, ptr %dest) #0 { 116; VBITS_GE_256-LABEL: store_trunc_v8i64i32: 117; VBITS_GE_256: // %bb.0: 118; VBITS_GE_256-NEXT: ptrue p0.d, vl4 119; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 120; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 121; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] 122; VBITS_GE_256-NEXT: ptrue p0.s, vl4 123; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 124; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 125; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s 126; VBITS_GE_256-NEXT: ptrue p0.s, vl8 127; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] 128; VBITS_GE_256-NEXT: ret 129; 130; VBITS_GE_512-LABEL: store_trunc_v8i64i32: 131; VBITS_GE_512: // %bb.0: 132; VBITS_GE_512-NEXT: ptrue p0.d, vl8 133; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 134; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x1] 135; VBITS_GE_512-NEXT: ret 136 %a = load <8 x i64>, ptr %ap 137 %val = trunc <8 x i64> %a to <8 x i32> 138 store <8 x i32> %val, ptr %dest 139 ret void 140} 141 142define void @store_trunc_v16i32i8(ptr %ap, ptr %dest) #0 { 143; Currently does not use the truncating store 144; VBITS_GE_256-LABEL: store_trunc_v16i32i8: 145; VBITS_GE_256: // %bb.0: 146; VBITS_GE_256-NEXT: ptrue p0.s, vl8 147; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 148; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 149; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 150; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 151; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 152; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b 153; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b 154; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] 155; VBITS_GE_256-NEXT: str q1, [x1] 156; VBITS_GE_256-NEXT: ret 157; 158; VBITS_GE_512-LABEL: store_trunc_v16i32i8: 159; VBITS_GE_512: // %bb.0: 160; VBITS_GE_512-NEXT: ptrue p0.s, vl16 161; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 162; VBITS_GE_512-NEXT: st1b { z0.s }, p0, [x1] 163; VBITS_GE_512-NEXT: ret 164 %a = load <16 x i32>, ptr %ap 165 %val = trunc <16 x i32> %a to <16 x i8> 166 store <16 x i8> %val, ptr %dest 167 ret void 168} 169 170define void @store_trunc_v16i32i16(ptr %ap, ptr %dest) #0 { 171; VBITS_GE_256-LABEL: store_trunc_v16i32i16: 172; VBITS_GE_256: // %bb.0: 173; VBITS_GE_256-NEXT: ptrue p0.s, vl8 174; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 175; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 176; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 177; VBITS_GE_256-NEXT: ptrue p0.h, vl8 178; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 179; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 180; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h 181; VBITS_GE_256-NEXT: ptrue p0.h, vl16 182; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] 183; VBITS_GE_256-NEXT: ret 184; 185; VBITS_GE_512-LABEL: store_trunc_v16i32i16: 186; VBITS_GE_512: // %bb.0: 187; VBITS_GE_512-NEXT: ptrue p0.s, vl16 188; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 189; VBITS_GE_512-NEXT: st1h { z0.s }, p0, [x1] 190; VBITS_GE_512-NEXT: ret 191 %a = load <16 x i32>, ptr %ap 192 %val = trunc <16 x i32> %a to <16 x i16> 193 store <16 x i16> %val, ptr %dest 194 ret void 195} 196 197define void @store_trunc_v32i16i8(ptr %ap, ptr %dest) #0 { 198; VBITS_GE_256-LABEL: store_trunc_v32i16i8: 199; VBITS_GE_256: // %bb.0: 200; VBITS_GE_256-NEXT: ptrue p0.h, vl16 201; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 202; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 203; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 204; VBITS_GE_256-NEXT: ptrue p0.b, vl16 205; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b 206; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b 207; VBITS_GE_256-NEXT: splice z1.b, p0, z1.b, z0.b 208; VBITS_GE_256-NEXT: ptrue p0.b, vl32 209; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x1] 210; VBITS_GE_256-NEXT: ret 211; 212; VBITS_GE_512-LABEL: store_trunc_v32i16i8: 213; VBITS_GE_512: // %bb.0: 214; VBITS_GE_512-NEXT: ptrue p0.h, vl32 215; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 216; VBITS_GE_512-NEXT: st1b { z0.h }, p0, [x1] 217; VBITS_GE_512-NEXT: ret 218 %a = load <32 x i16>, ptr %ap 219 %val = trunc <32 x i16> %a to <32 x i8> 220 store <32 x i8> %val, ptr %dest 221 ret void 222} 223 224attributes #0 = { "target-features"="+sve" } 225