1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024 3; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048 4 5target triple = "aarch64-unknown-linux-gnu" 6 7define <4 x i32> @load_zext_v4i16i32(ptr %ap) vscale_range(2,0) #0 { 8; CHECK-LABEL: load_zext_v4i16i32: 9; CHECK: // %bb.0: 10; CHECK-NEXT: ldr d0, [x0] 11; CHECK-NEXT: ushll v0.4s, v0.4h, #0 12; CHECK-NEXT: ret 13 %a = load <4 x i16>, ptr %ap 14 %val = zext <4 x i16> %a to <4 x i32> 15 ret <4 x i32> %val 16} 17 18; Don't try to use SVE for irregular types. 19define <2 x i256> @load_zext_v2i64i256(ptr %ap) #0 { 20; CHECK-LABEL: load_zext_v2i64i256: 21; CHECK: // %bb.0: 22; CHECK-NEXT: ldr q0, [x0] 23; CHECK-NEXT: mov x1, xzr 24; CHECK-NEXT: mov x2, xzr 25; CHECK-NEXT: mov x3, xzr 26; CHECK-NEXT: mov x5, xzr 27; CHECK-NEXT: mov x6, xzr 28; CHECK-NEXT: mov x4, v0.d[1] 29; CHECK-NEXT: fmov x0, d0 30; CHECK-NEXT: mov x7, xzr 31; CHECK-NEXT: ret 32 %a = load <2 x i64>, ptr %ap 33 %val = zext <2 x i64> %a to <2 x i256> 34 ret <2 x i256> %val 35} 36 37define void @load_zext_v8i16i32(ptr %ap, ptr %b) vscale_range(2,0) #0 { 38; CHECK-LABEL: load_zext_v8i16i32: 39; CHECK: // %bb.0: 40; CHECK-NEXT: ptrue p0.s, vl8 41; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 42; CHECK-NEXT: st1w { z0.s }, p0, [x1] 43; CHECK-NEXT: ret 44 %a = load <8 x i16>, ptr %ap 45 %val = zext <8 x i16> %a to <8 x i32> 46 store <8 x i32> %val, ptr %b 47 ret void 48} 49 50define void @load_zext_v16i16i32(ptr %ap, ptr %b) vscale_range(4,0) #0 { 51; CHECK-LABEL: load_zext_v16i16i32: 52; CHECK: // %bb.0: 53; CHECK-NEXT: ptrue p0.s, vl16 54; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 55; CHECK-NEXT: st1w { z0.s }, p0, [x1] 56; CHECK-NEXT: ret 57 %a = load <16 x i16>, ptr %ap 58 %val = zext <16 x i16> %a to <16 x i32> 59 store <16 x i32> %val, ptr %b 60 ret void 61} 62 63define void @load_zext_v32i16i32(ptr %ap, ptr %b) vscale_range(8,0) #0 { 64; CHECK-LABEL: load_zext_v32i16i32: 65; CHECK: // %bb.0: 66; CHECK-NEXT: ptrue p0.s, vl32 67; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 68; CHECK-NEXT: st1w { z0.s }, p0, [x1] 69; CHECK-NEXT: ret 70 %a = load <32 x i16>, ptr %ap 71 %val = zext <32 x i16> %a to <32 x i32> 72 store <32 x i32> %val, ptr %b 73 ret void 74} 75 76define void @load_zext_v64i16i32(ptr %ap, ptr %b) #0 { 77; VBITS_GE_1024-LABEL: load_zext_v64i16i32: 78; VBITS_GE_1024: // %bb.0: 79; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 80; VBITS_GE_1024-NEXT: mov x8, #32 // =0x20 81; VBITS_GE_1024-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1] 82; VBITS_GE_1024-NEXT: ld1h { z1.s }, p0/z, [x0] 83; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 84; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x1] 85; VBITS_GE_1024-NEXT: ret 86; 87; VBITS_GE_2048-LABEL: load_zext_v64i16i32: 88; VBITS_GE_2048: // %bb.0: 89; VBITS_GE_2048-NEXT: ptrue p0.s, vl64 90; VBITS_GE_2048-NEXT: ld1h { z0.s }, p0/z, [x0] 91; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1] 92; VBITS_GE_2048-NEXT: ret 93 %a = load <64 x i16>, ptr %ap 94 %val = zext <64 x i16> %a to <64 x i32> 95 store <64 x i32> %val, ptr %b 96 ret void 97} 98 99define <4 x i32> @load_sext_v4i16i32(ptr %ap) vscale_range(2,0) #0 { 100; CHECK-LABEL: load_sext_v4i16i32: 101; CHECK: // %bb.0: 102; CHECK-NEXT: ldr d0, [x0] 103; CHECK-NEXT: sshll v0.4s, v0.4h, #0 104; CHECK-NEXT: ret 105 %a = load <4 x i16>, ptr %ap 106 %val = sext <4 x i16> %a to <4 x i32> 107 ret <4 x i32> %val 108} 109 110define void @load_sext_v8i16i32(ptr %ap, ptr %b) vscale_range(2,0) #0 { 111; CHECK-LABEL: load_sext_v8i16i32: 112; CHECK: // %bb.0: 113; CHECK-NEXT: ptrue p0.s, vl8 114; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] 115; CHECK-NEXT: st1w { z0.s }, p0, [x1] 116; CHECK-NEXT: ret 117 %a = load <8 x i16>, ptr %ap 118 %val = sext <8 x i16> %a to <8 x i32> 119 store <8 x i32> %val, ptr %b 120 ret void 121} 122 123define void @load_sext_v16i16i32(ptr %ap, ptr %b) vscale_range(4,0) #0 { 124; CHECK-LABEL: load_sext_v16i16i32: 125; CHECK: // %bb.0: 126; CHECK-NEXT: ptrue p0.s, vl16 127; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] 128; CHECK-NEXT: st1w { z0.s }, p0, [x1] 129; CHECK-NEXT: ret 130 %a = load <16 x i16>, ptr %ap 131 %val = sext <16 x i16> %a to <16 x i32> 132 store <16 x i32> %val, ptr %b 133 ret void 134} 135 136define void @load_sext_v32i16i32(ptr %ap, ptr %b) vscale_range(8,0) #0 { 137; CHECK-LABEL: load_sext_v32i16i32: 138; CHECK: // %bb.0: 139; CHECK-NEXT: ptrue p0.s, vl32 140; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] 141; CHECK-NEXT: st1w { z0.s }, p0, [x1] 142; CHECK-NEXT: ret 143 %a = load <32 x i16>, ptr %ap 144 %val = sext <32 x i16> %a to <32 x i32> 145 store <32 x i32> %val, ptr %b 146 ret void 147} 148 149define void @load_sext_v64i16i32(ptr %ap, ptr %b) #0 { 150; VBITS_GE_1024-LABEL: load_sext_v64i16i32: 151; VBITS_GE_1024: // %bb.0: 152; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 153; VBITS_GE_1024-NEXT: mov x8, #32 // =0x20 154; VBITS_GE_1024-NEXT: ld1sh { z0.s }, p0/z, [x0, x8, lsl #1] 155; VBITS_GE_1024-NEXT: ld1sh { z1.s }, p0/z, [x0] 156; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 157; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x1] 158; VBITS_GE_1024-NEXT: ret 159; 160; VBITS_GE_2048-LABEL: load_sext_v64i16i32: 161; VBITS_GE_2048: // %bb.0: 162; VBITS_GE_2048-NEXT: ptrue p0.s, vl64 163; VBITS_GE_2048-NEXT: ld1sh { z0.s }, p0/z, [x0] 164; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1] 165; VBITS_GE_2048-NEXT: ret 166 %a = load <64 x i16>, ptr %ap 167 %val = sext <64 x i16> %a to <64 x i32> 168 store <64 x i32> %val, ptr %b 169 ret void 170} 171 172define void @load_zext_v32i8i64(ptr %ap, ptr %b) #0 { 173; VBITS_GE_1024-LABEL: load_zext_v32i8i64: 174; VBITS_GE_1024: // %bb.0: 175; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 176; VBITS_GE_1024-NEXT: mov w8, #16 // =0x10 177; VBITS_GE_1024-NEXT: ld1b { z0.d }, p0/z, [x0, x8] 178; VBITS_GE_1024-NEXT: ld1b { z1.d }, p0/z, [x0] 179; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10 180; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 181; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1] 182; VBITS_GE_1024-NEXT: ret 183; 184; VBITS_GE_2048-LABEL: load_zext_v32i8i64: 185; VBITS_GE_2048: // %bb.0: 186; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 187; VBITS_GE_2048-NEXT: ld1b { z0.d }, p0/z, [x0] 188; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1] 189; VBITS_GE_2048-NEXT: ret 190 %a = load <32 x i8>, ptr %ap 191 %val = zext <32 x i8> %a to <32 x i64> 192 store <32 x i64> %val, ptr %b 193 ret void 194} 195 196define void @load_sext_v32i8i64(ptr %ap, ptr %b) #0 { 197; VBITS_GE_1024-LABEL: load_sext_v32i8i64: 198; VBITS_GE_1024: // %bb.0: 199; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 200; VBITS_GE_1024-NEXT: mov w8, #16 // =0x10 201; VBITS_GE_1024-NEXT: ld1sb { z0.d }, p0/z, [x0, x8] 202; VBITS_GE_1024-NEXT: ld1sb { z1.d }, p0/z, [x0] 203; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10 204; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 205; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1] 206; VBITS_GE_1024-NEXT: ret 207; 208; VBITS_GE_2048-LABEL: load_sext_v32i8i64: 209; VBITS_GE_2048: // %bb.0: 210; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 211; VBITS_GE_2048-NEXT: ld1sb { z0.d }, p0/z, [x0] 212; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1] 213; VBITS_GE_2048-NEXT: ret 214 %a = load <32 x i8>, ptr %ap 215 %val = sext <32 x i8> %a to <32 x i64> 216 store <32 x i64> %val, ptr %b 217 ret void 218} 219 220define void @load_zext_v32i16i64(ptr %ap, ptr %b) #0 { 221; VBITS_GE_1024-LABEL: load_zext_v32i16i64: 222; VBITS_GE_1024: // %bb.0: 223; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 224; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10 225; VBITS_GE_1024-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1] 226; VBITS_GE_1024-NEXT: ld1h { z1.d }, p0/z, [x0] 227; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 228; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1] 229; VBITS_GE_1024-NEXT: ret 230; 231; VBITS_GE_2048-LABEL: load_zext_v32i16i64: 232; VBITS_GE_2048: // %bb.0: 233; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 234; VBITS_GE_2048-NEXT: ld1h { z0.d }, p0/z, [x0] 235; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1] 236; VBITS_GE_2048-NEXT: ret 237 %a = load <32 x i16>, ptr %ap 238 %val = zext <32 x i16> %a to <32 x i64> 239 store <32 x i64> %val, ptr %b 240 ret void 241} 242 243define void @load_sext_v32i16i64(ptr %ap, ptr %b) #0 { 244; VBITS_GE_1024-LABEL: load_sext_v32i16i64: 245; VBITS_GE_1024: // %bb.0: 246; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 247; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10 248; VBITS_GE_1024-NEXT: ld1sh { z0.d }, p0/z, [x0, x8, lsl #1] 249; VBITS_GE_1024-NEXT: ld1sh { z1.d }, p0/z, [x0] 250; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 251; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1] 252; VBITS_GE_1024-NEXT: ret 253; 254; VBITS_GE_2048-LABEL: load_sext_v32i16i64: 255; VBITS_GE_2048: // %bb.0: 256; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 257; VBITS_GE_2048-NEXT: ld1sh { z0.d }, p0/z, [x0] 258; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1] 259; VBITS_GE_2048-NEXT: ret 260 %a = load <32 x i16>, ptr %ap 261 %val = sext <32 x i16> %a to <32 x i64> 262 store <32 x i64> %val, ptr %b 263 ret void 264} 265 266define void @load_zext_v32i32i64(ptr %ap, ptr %b) #0 { 267; VBITS_GE_1024-LABEL: load_zext_v32i32i64: 268; VBITS_GE_1024: // %bb.0: 269; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 270; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10 271; VBITS_GE_1024-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2] 272; VBITS_GE_1024-NEXT: ld1w { z1.d }, p0/z, [x0] 273; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 274; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1] 275; VBITS_GE_1024-NEXT: ret 276; 277; VBITS_GE_2048-LABEL: load_zext_v32i32i64: 278; VBITS_GE_2048: // %bb.0: 279; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 280; VBITS_GE_2048-NEXT: ld1w { z0.d }, p0/z, [x0] 281; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1] 282; VBITS_GE_2048-NEXT: ret 283 %a = load <32 x i32>, ptr %ap 284 %val = zext <32 x i32> %a to <32 x i64> 285 store <32 x i64> %val, ptr %b 286 ret void 287} 288 289define void @load_sext_v32i32i64(ptr %ap, ptr %b) #0 { 290; VBITS_GE_1024-LABEL: load_sext_v32i32i64: 291; VBITS_GE_1024: // %bb.0: 292; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 293; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10 294; VBITS_GE_1024-NEXT: ld1sw { z0.d }, p0/z, [x0, x8, lsl #2] 295; VBITS_GE_1024-NEXT: ld1sw { z1.d }, p0/z, [x0] 296; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 297; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1] 298; VBITS_GE_1024-NEXT: ret 299; 300; VBITS_GE_2048-LABEL: load_sext_v32i32i64: 301; VBITS_GE_2048: // %bb.0: 302; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 303; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p0/z, [x0] 304; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1] 305; VBITS_GE_2048-NEXT: ret 306 %a = load <32 x i32>, ptr %ap 307 %val = sext <32 x i32> %a to <32 x i64> 308 store <32 x i64> %val, ptr %b 309 ret void 310} 311 312attributes #0 = { "target-features"="+sve" } 313