1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s 3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s 4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 5 6target triple = "aarch64-unknown-linux-gnu" 7 8define <4 x i8> @load_v4i8(ptr %a) { 9; CHECK-LABEL: load_v4i8: 10; CHECK: // %bb.0: 11; CHECK-NEXT: ptrue p0.h, vl4 12; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] 13; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 14; CHECK-NEXT: ret 15; 16; NONEON-NOSVE-LABEL: load_v4i8: 17; NONEON-NOSVE: // %bb.0: 18; NONEON-NOSVE-NEXT: sub sp, sp, #16 19; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 20; NONEON-NOSVE-NEXT: ldrb w8, [x0, #3] 21; NONEON-NOSVE-NEXT: strh w8, [sp, #14] 22; NONEON-NOSVE-NEXT: ldrb w8, [x0, #2] 23; NONEON-NOSVE-NEXT: strh w8, [sp, #12] 24; NONEON-NOSVE-NEXT: ldrb w8, [x0, #1] 25; NONEON-NOSVE-NEXT: strh w8, [sp, #10] 26; NONEON-NOSVE-NEXT: ldrb w8, [x0] 27; NONEON-NOSVE-NEXT: strh w8, [sp, #8] 28; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 29; NONEON-NOSVE-NEXT: add sp, sp, #16 30; NONEON-NOSVE-NEXT: ret 31 %load = load <4 x i8>, ptr %a 32 ret <4 x i8> %load 33} 34 35define <8 x i8> @load_v8i8(ptr %a) { 36; CHECK-LABEL: load_v8i8: 37; CHECK: // %bb.0: 38; CHECK-NEXT: ldr d0, [x0] 39; CHECK-NEXT: ret 40; 41; NONEON-NOSVE-LABEL: load_v8i8: 42; NONEON-NOSVE: // %bb.0: 43; NONEON-NOSVE-NEXT: ldr d0, [x0] 44; NONEON-NOSVE-NEXT: ret 45 %load = load <8 x i8>, ptr %a 46 ret <8 x i8> %load 47} 48 49define <16 x i8> @load_v16i8(ptr %a) { 50; CHECK-LABEL: load_v16i8: 51; CHECK: // %bb.0: 52; CHECK-NEXT: ldr q0, [x0] 53; CHECK-NEXT: ret 54; 55; NONEON-NOSVE-LABEL: load_v16i8: 56; NONEON-NOSVE: // %bb.0: 57; NONEON-NOSVE-NEXT: ldr q0, [x0] 58; NONEON-NOSVE-NEXT: ret 59 %load = load <16 x i8>, ptr %a 60 ret <16 x i8> %load 61} 62 63define <32 x i8> @load_v32i8(ptr %a) { 64; CHECK-LABEL: load_v32i8: 65; CHECK: // %bb.0: 66; CHECK-NEXT: ldp q0, q1, [x0] 67; CHECK-NEXT: ret 68; 69; NONEON-NOSVE-LABEL: load_v32i8: 70; NONEON-NOSVE: // %bb.0: 71; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 72; NONEON-NOSVE-NEXT: ret 73 %load = load <32 x i8>, ptr %a 74 ret <32 x i8> %load 75} 76 77define <2 x i16> @load_v2i16(ptr %a) { 78; CHECK-LABEL: load_v2i16: 79; CHECK: // %bb.0: 80; CHECK-NEXT: ptrue p0.s, vl2 81; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 82; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 83; CHECK-NEXT: ret 84; 85; NONEON-NOSVE-LABEL: load_v2i16: 86; NONEON-NOSVE: // %bb.0: 87; NONEON-NOSVE-NEXT: sub sp, sp, #16 88; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 89; NONEON-NOSVE-NEXT: ldrh w8, [x0, #2] 90; NONEON-NOSVE-NEXT: str w8, [sp, #12] 91; NONEON-NOSVE-NEXT: ldrh w8, [x0] 92; NONEON-NOSVE-NEXT: str w8, [sp, #8] 93; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 94; NONEON-NOSVE-NEXT: add sp, sp, #16 95; NONEON-NOSVE-NEXT: ret 96 %load = load <2 x i16>, ptr %a 97 ret <2 x i16> %load 98} 99 100define <2 x half> @load_v2f16(ptr %a) { 101; CHECK-LABEL: load_v2f16: 102; CHECK: // %bb.0: 103; CHECK-NEXT: ldr s0, [x0] 104; CHECK-NEXT: ret 105; 106; NONEON-NOSVE-LABEL: load_v2f16: 107; NONEON-NOSVE: // %bb.0: 108; NONEON-NOSVE-NEXT: sub sp, sp, #16 109; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 110; NONEON-NOSVE-NEXT: ldr w8, [x0] 111; NONEON-NOSVE-NEXT: str w8, [sp, #8] 112; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 113; NONEON-NOSVE-NEXT: add sp, sp, #16 114; NONEON-NOSVE-NEXT: ret 115 %load = load <2 x half>, ptr %a 116 ret <2 x half> %load 117} 118 119define <4 x i16> @load_v4i16(ptr %a) { 120; CHECK-LABEL: load_v4i16: 121; CHECK: // %bb.0: 122; CHECK-NEXT: ldr d0, [x0] 123; CHECK-NEXT: ret 124; 125; NONEON-NOSVE-LABEL: load_v4i16: 126; NONEON-NOSVE: // %bb.0: 127; NONEON-NOSVE-NEXT: ldr d0, [x0] 128; NONEON-NOSVE-NEXT: ret 129 %load = load <4 x i16>, ptr %a 130 ret <4 x i16> %load 131} 132 133define <4 x half> @load_v4f16(ptr %a) { 134; CHECK-LABEL: load_v4f16: 135; CHECK: // %bb.0: 136; CHECK-NEXT: ldr d0, [x0] 137; CHECK-NEXT: ret 138; 139; NONEON-NOSVE-LABEL: load_v4f16: 140; NONEON-NOSVE: // %bb.0: 141; NONEON-NOSVE-NEXT: ldr d0, [x0] 142; NONEON-NOSVE-NEXT: ret 143 %load = load <4 x half>, ptr %a 144 ret <4 x half> %load 145} 146 147define <8 x i16> @load_v8i16(ptr %a) { 148; CHECK-LABEL: load_v8i16: 149; CHECK: // %bb.0: 150; CHECK-NEXT: ldr q0, [x0] 151; CHECK-NEXT: ret 152; 153; NONEON-NOSVE-LABEL: load_v8i16: 154; NONEON-NOSVE: // %bb.0: 155; NONEON-NOSVE-NEXT: ldr q0, [x0] 156; NONEON-NOSVE-NEXT: ret 157 %load = load <8 x i16>, ptr %a 158 ret <8 x i16> %load 159} 160 161define <8 x half> @load_v8f16(ptr %a) { 162; CHECK-LABEL: load_v8f16: 163; CHECK: // %bb.0: 164; CHECK-NEXT: ldr q0, [x0] 165; CHECK-NEXT: ret 166; 167; NONEON-NOSVE-LABEL: load_v8f16: 168; NONEON-NOSVE: // %bb.0: 169; NONEON-NOSVE-NEXT: ldr q0, [x0] 170; NONEON-NOSVE-NEXT: ret 171 %load = load <8 x half>, ptr %a 172 ret <8 x half> %load 173} 174 175define <16 x i16> @load_v16i16(ptr %a) { 176; CHECK-LABEL: load_v16i16: 177; CHECK: // %bb.0: 178; CHECK-NEXT: ldp q0, q1, [x0] 179; CHECK-NEXT: ret 180; 181; NONEON-NOSVE-LABEL: load_v16i16: 182; NONEON-NOSVE: // %bb.0: 183; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 184; NONEON-NOSVE-NEXT: ret 185 %load = load <16 x i16>, ptr %a 186 ret <16 x i16> %load 187} 188 189define <16 x half> @load_v16f16(ptr %a) { 190; CHECK-LABEL: load_v16f16: 191; CHECK: // %bb.0: 192; CHECK-NEXT: ldp q0, q1, [x0] 193; CHECK-NEXT: ret 194; 195; NONEON-NOSVE-LABEL: load_v16f16: 196; NONEON-NOSVE: // %bb.0: 197; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 198; NONEON-NOSVE-NEXT: ret 199 %load = load <16 x half>, ptr %a 200 ret <16 x half> %load 201} 202 203define <2 x i32> @load_v2i32(ptr %a) { 204; CHECK-LABEL: load_v2i32: 205; CHECK: // %bb.0: 206; CHECK-NEXT: ldr d0, [x0] 207; CHECK-NEXT: ret 208; 209; NONEON-NOSVE-LABEL: load_v2i32: 210; NONEON-NOSVE: // %bb.0: 211; NONEON-NOSVE-NEXT: ldr d0, [x0] 212; NONEON-NOSVE-NEXT: ret 213 %load = load <2 x i32>, ptr %a 214 ret <2 x i32> %load 215} 216 217define <2 x float> @load_v2f32(ptr %a) { 218; CHECK-LABEL: load_v2f32: 219; CHECK: // %bb.0: 220; CHECK-NEXT: ldr d0, [x0] 221; CHECK-NEXT: ret 222; 223; NONEON-NOSVE-LABEL: load_v2f32: 224; NONEON-NOSVE: // %bb.0: 225; NONEON-NOSVE-NEXT: ldr d0, [x0] 226; NONEON-NOSVE-NEXT: ret 227 %load = load <2 x float>, ptr %a 228 ret <2 x float> %load 229} 230 231define <4 x i32> @load_v4i32(ptr %a) { 232; CHECK-LABEL: load_v4i32: 233; CHECK: // %bb.0: 234; CHECK-NEXT: ldr q0, [x0] 235; CHECK-NEXT: ret 236; 237; NONEON-NOSVE-LABEL: load_v4i32: 238; NONEON-NOSVE: // %bb.0: 239; NONEON-NOSVE-NEXT: ldr q0, [x0] 240; NONEON-NOSVE-NEXT: ret 241 %load = load <4 x i32>, ptr %a 242 ret <4 x i32> %load 243} 244 245define <4 x float> @load_v4f32(ptr %a) { 246; CHECK-LABEL: load_v4f32: 247; CHECK: // %bb.0: 248; CHECK-NEXT: ldr q0, [x0] 249; CHECK-NEXT: ret 250; 251; NONEON-NOSVE-LABEL: load_v4f32: 252; NONEON-NOSVE: // %bb.0: 253; NONEON-NOSVE-NEXT: ldr q0, [x0] 254; NONEON-NOSVE-NEXT: ret 255 %load = load <4 x float>, ptr %a 256 ret <4 x float> %load 257} 258 259define <8 x i32> @load_v8i32(ptr %a) { 260; CHECK-LABEL: load_v8i32: 261; CHECK: // %bb.0: 262; CHECK-NEXT: ldp q0, q1, [x0] 263; CHECK-NEXT: ret 264; 265; NONEON-NOSVE-LABEL: load_v8i32: 266; NONEON-NOSVE: // %bb.0: 267; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 268; NONEON-NOSVE-NEXT: ret 269 %load = load <8 x i32>, ptr %a 270 ret <8 x i32> %load 271} 272 273define <8 x float> @load_v8f32(ptr %a) { 274; CHECK-LABEL: load_v8f32: 275; CHECK: // %bb.0: 276; CHECK-NEXT: ldp q0, q1, [x0] 277; CHECK-NEXT: ret 278; 279; NONEON-NOSVE-LABEL: load_v8f32: 280; NONEON-NOSVE: // %bb.0: 281; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 282; NONEON-NOSVE-NEXT: ret 283 %load = load <8 x float>, ptr %a 284 ret <8 x float> %load 285} 286 287define <1 x i64> @load_v1i64(ptr %a) { 288; CHECK-LABEL: load_v1i64: 289; CHECK: // %bb.0: 290; CHECK-NEXT: ldr d0, [x0] 291; CHECK-NEXT: ret 292; 293; NONEON-NOSVE-LABEL: load_v1i64: 294; NONEON-NOSVE: // %bb.0: 295; NONEON-NOSVE-NEXT: ldr d0, [x0] 296; NONEON-NOSVE-NEXT: ret 297 %load = load <1 x i64>, ptr %a 298 ret <1 x i64> %load 299} 300 301define <1 x double> @load_v1f64(ptr %a) { 302; CHECK-LABEL: load_v1f64: 303; CHECK: // %bb.0: 304; CHECK-NEXT: ldr d0, [x0] 305; CHECK-NEXT: ret 306; 307; NONEON-NOSVE-LABEL: load_v1f64: 308; NONEON-NOSVE: // %bb.0: 309; NONEON-NOSVE-NEXT: ldr d0, [x0] 310; NONEON-NOSVE-NEXT: ret 311 %load = load <1 x double>, ptr %a 312 ret <1 x double> %load 313} 314 315define <2 x i64> @load_v2i64(ptr %a) { 316; CHECK-LABEL: load_v2i64: 317; CHECK: // %bb.0: 318; CHECK-NEXT: ldr q0, [x0] 319; CHECK-NEXT: ret 320; 321; NONEON-NOSVE-LABEL: load_v2i64: 322; NONEON-NOSVE: // %bb.0: 323; NONEON-NOSVE-NEXT: ldr q0, [x0] 324; NONEON-NOSVE-NEXT: ret 325 %load = load <2 x i64>, ptr %a 326 ret <2 x i64> %load 327} 328 329define <2 x double> @load_v2f64(ptr %a) { 330; CHECK-LABEL: load_v2f64: 331; CHECK: // %bb.0: 332; CHECK-NEXT: ldr q0, [x0] 333; CHECK-NEXT: ret 334; 335; NONEON-NOSVE-LABEL: load_v2f64: 336; NONEON-NOSVE: // %bb.0: 337; NONEON-NOSVE-NEXT: ldr q0, [x0] 338; NONEON-NOSVE-NEXT: ret 339 %load = load <2 x double>, ptr %a 340 ret <2 x double> %load 341} 342 343define <4 x i64> @load_v4i64(ptr %a) { 344; CHECK-LABEL: load_v4i64: 345; CHECK: // %bb.0: 346; CHECK-NEXT: ldp q0, q1, [x0] 347; CHECK-NEXT: ret 348; 349; NONEON-NOSVE-LABEL: load_v4i64: 350; NONEON-NOSVE: // %bb.0: 351; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 352; NONEON-NOSVE-NEXT: ret 353 %load = load <4 x i64>, ptr %a 354 ret <4 x i64> %load 355} 356 357define <4 x double> @load_v4f64(ptr %a) { 358; CHECK-LABEL: load_v4f64: 359; CHECK: // %bb.0: 360; CHECK-NEXT: ldp q0, q1, [x0] 361; CHECK-NEXT: ret 362; 363; NONEON-NOSVE-LABEL: load_v4f64: 364; NONEON-NOSVE: // %bb.0: 365; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 366; NONEON-NOSVE-NEXT: ret 367 %load = load <4 x double>, ptr %a 368 ret <4 x double> %load 369} 370 371