1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } 6%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 7%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } 8 9define %struct.__neon_int8x8x2_t @ld2_8b(ptr %A) nounwind { 10; CHECK-LABEL: ld2_8b: 11; CHECK: // %bb.0: 12; CHECK-NEXT: ld2.8b { v0, v1 }, [x0] 13; CHECK-NEXT: ret 14; Make sure we are loading into the results defined by the ABI (i.e., v0, v1) 15; and from the argument of the function also defined by ABI (i.e., x0) 16 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A) 17 ret %struct.__neon_int8x8x2_t %tmp2 18} 19 20define %struct.__neon_int8x8x3_t @ld3_8b(ptr %A) nounwind { 21; CHECK-LABEL: ld3_8b: 22; CHECK: // %bb.0: 23; CHECK-NEXT: ld3.8b { v0, v1, v2 }, [x0] 24; CHECK-NEXT: ret 25; Make sure we are using the operands defined by the ABI 26 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A) 27 ret %struct.__neon_int8x8x3_t %tmp2 28} 29 30define %struct.__neon_int8x8x4_t @ld4_8b(ptr %A) nounwind { 31; CHECK-LABEL: ld4_8b: 32; CHECK: // %bb.0: 33; CHECK-NEXT: ld4.8b { v0, v1, v2, v3 }, [x0] 34; CHECK-NEXT: ret 35; Make sure we are using the operands defined by the ABI 36 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A) 37 ret %struct.__neon_int8x8x4_t %tmp2 38} 39 40declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr) nounwind readonly 41declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr) nounwind readonly 42declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr) nounwind readonly 43 44%struct.__neon_int8x16x2_t = type { <16 x i8>, <16 x i8> } 45%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } 46%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } 47 48define %struct.__neon_int8x16x2_t @ld2_16b(ptr %A) nounwind { 49; CHECK-LABEL: ld2_16b: 50; CHECK: // %bb.0: 51; CHECK-NEXT: ld2.16b { v0, v1 }, [x0] 52; CHECK-NEXT: ret 53; Make sure we are using the operands defined by the ABI 54 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A) 55 ret %struct.__neon_int8x16x2_t %tmp2 56} 57 58define %struct.__neon_int8x16x3_t @ld3_16b(ptr %A) nounwind { 59; CHECK-LABEL: ld3_16b: 60; CHECK: // %bb.0: 61; CHECK-NEXT: ld3.16b { v0, v1, v2 }, [x0] 62; CHECK-NEXT: ret 63; Make sure we are using the operands defined by the ABI 64 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A) 65 ret %struct.__neon_int8x16x3_t %tmp2 66} 67 68define %struct.__neon_int8x16x4_t @ld4_16b(ptr %A) nounwind { 69; CHECK-LABEL: ld4_16b: 70; CHECK: // %bb.0: 71; CHECK-NEXT: ld4.16b { v0, v1, v2, v3 }, [x0] 72; CHECK-NEXT: ret 73; Make sure we are using the operands defined by the ABI 74 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A) 75 ret %struct.__neon_int8x16x4_t %tmp2 76} 77 78declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr) nounwind readonly 79declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr) nounwind readonly 80declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr) nounwind readonly 81 82%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> } 83%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 84%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } 85 86define %struct.__neon_int16x4x2_t @ld2_4h(ptr %A) nounwind { 87; CHECK-LABEL: ld2_4h: 88; CHECK: // %bb.0: 89; CHECK-NEXT: ld2.4h { v0, v1 }, [x0] 90; CHECK-NEXT: ret 91; Make sure we are using the operands defined by the ABI 92 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A) 93 ret %struct.__neon_int16x4x2_t %tmp2 94} 95 96define %struct.__neon_int16x4x3_t @ld3_4h(ptr %A) nounwind { 97; CHECK-LABEL: ld3_4h: 98; CHECK: // %bb.0: 99; CHECK-NEXT: ld3.4h { v0, v1, v2 }, [x0] 100; CHECK-NEXT: ret 101; Make sure we are using the operands defined by the ABI 102 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A) 103 ret %struct.__neon_int16x4x3_t %tmp2 104} 105 106define %struct.__neon_int16x4x4_t @ld4_4h(ptr %A) nounwind { 107; CHECK-LABEL: ld4_4h: 108; CHECK: // %bb.0: 109; CHECK-NEXT: ld4.4h { v0, v1, v2, v3 }, [x0] 110; CHECK-NEXT: ret 111; Make sure we are using the operands defined by the ABI 112 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A) 113 ret %struct.__neon_int16x4x4_t %tmp2 114} 115 116declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr) nounwind readonly 117declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr) nounwind readonly 118declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr) nounwind readonly 119 120%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } 121%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } 122%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } 123 124define %struct.__neon_int16x8x2_t @ld2_8h(ptr %A) nounwind { 125; CHECK-LABEL: ld2_8h: 126; CHECK: // %bb.0: 127; CHECK-NEXT: ld2.8h { v0, v1 }, [x0] 128; CHECK-NEXT: ret 129; Make sure we are using the operands defined by the ABI 130 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A) 131 ret %struct.__neon_int16x8x2_t %tmp2 132} 133 134define %struct.__neon_int16x8x3_t @ld3_8h(ptr %A) nounwind { 135; CHECK-LABEL: ld3_8h: 136; CHECK: // %bb.0: 137; CHECK-NEXT: ld3.8h { v0, v1, v2 }, [x0] 138; CHECK-NEXT: ret 139; Make sure we are using the operands defined by the ABI 140 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A) 141 ret %struct.__neon_int16x8x3_t %tmp2 142} 143 144define %struct.__neon_int16x8x4_t @ld4_8h(ptr %A) nounwind { 145; CHECK-LABEL: ld4_8h: 146; CHECK: // %bb.0: 147; CHECK-NEXT: ld4.8h { v0, v1, v2, v3 }, [x0] 148; CHECK-NEXT: ret 149; Make sure we are using the operands defined by the ABI 150 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A) 151 ret %struct.__neon_int16x8x4_t %tmp2 152} 153 154declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr) nounwind readonly 155declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr) nounwind readonly 156declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr) nounwind readonly 157 158%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> } 159%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } 160%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } 161 162define %struct.__neon_int32x2x2_t @ld2_2s(ptr %A) nounwind { 163; CHECK-LABEL: ld2_2s: 164; CHECK: // %bb.0: 165; CHECK-NEXT: ld2.2s { v0, v1 }, [x0] 166; CHECK-NEXT: ret 167; Make sure we are using the operands defined by the ABI 168 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A) 169 ret %struct.__neon_int32x2x2_t %tmp2 170} 171 172define %struct.__neon_int32x2x3_t @ld3_2s(ptr %A) nounwind { 173; CHECK-LABEL: ld3_2s: 174; CHECK: // %bb.0: 175; CHECK-NEXT: ld3.2s { v0, v1, v2 }, [x0] 176; CHECK-NEXT: ret 177; Make sure we are using the operands defined by the ABI 178 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A) 179 ret %struct.__neon_int32x2x3_t %tmp2 180} 181 182define %struct.__neon_int32x2x4_t @ld4_2s(ptr %A) nounwind { 183; CHECK-LABEL: ld4_2s: 184; CHECK: // %bb.0: 185; CHECK-NEXT: ld4.2s { v0, v1, v2, v3 }, [x0] 186; CHECK-NEXT: ret 187; Make sure we are using the operands defined by the ABI 188 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A) 189 ret %struct.__neon_int32x2x4_t %tmp2 190} 191 192declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr) nounwind readonly 193declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr) nounwind readonly 194declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr) nounwind readonly 195 196%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } 197%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } 198%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } 199 200define %struct.__neon_int32x4x2_t @ld2_4s(ptr %A) nounwind { 201; CHECK-LABEL: ld2_4s: 202; CHECK: // %bb.0: 203; CHECK-NEXT: ld2.4s { v0, v1 }, [x0] 204; CHECK-NEXT: ret 205; Make sure we are using the operands defined by the ABI 206 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A) 207 ret %struct.__neon_int32x4x2_t %tmp2 208} 209 210define %struct.__neon_int32x4x3_t @ld3_4s(ptr %A) nounwind { 211; CHECK-LABEL: ld3_4s: 212; CHECK: // %bb.0: 213; CHECK-NEXT: ld3.4s { v0, v1, v2 }, [x0] 214; CHECK-NEXT: ret 215; Make sure we are using the operands defined by the ABI 216 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A) 217 ret %struct.__neon_int32x4x3_t %tmp2 218} 219 220define %struct.__neon_int32x4x4_t @ld4_4s(ptr %A) nounwind { 221; CHECK-LABEL: ld4_4s: 222; CHECK: // %bb.0: 223; CHECK-NEXT: ld4.4s { v0, v1, v2, v3 }, [x0] 224; CHECK-NEXT: ret 225; Make sure we are using the operands defined by the ABI 226 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A) 227 ret %struct.__neon_int32x4x4_t %tmp2 228} 229 230declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr) nounwind readonly 231declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr) nounwind readonly 232declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr) nounwind readonly 233 234%struct.__neon_int64x2x2_t = type { <2 x i64>, <2 x i64> } 235%struct.__neon_int64x2x3_t = type { <2 x i64>, <2 x i64>, <2 x i64> } 236%struct.__neon_int64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } 237 238define %struct.__neon_int64x2x2_t @ld2_2d(ptr %A) nounwind { 239; CHECK-LABEL: ld2_2d: 240; CHECK: // %bb.0: 241; CHECK-NEXT: ld2.2d { v0, v1 }, [x0] 242; CHECK-NEXT: ret 243; Make sure we are using the operands defined by the ABI 244 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A) 245 ret %struct.__neon_int64x2x2_t %tmp2 246} 247 248define %struct.__neon_int64x2x3_t @ld3_2d(ptr %A) nounwind { 249; CHECK-LABEL: ld3_2d: 250; CHECK: // %bb.0: 251; CHECK-NEXT: ld3.2d { v0, v1, v2 }, [x0] 252; CHECK-NEXT: ret 253; Make sure we are using the operands defined by the ABI 254 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A) 255 ret %struct.__neon_int64x2x3_t %tmp2 256} 257 258define %struct.__neon_int64x2x4_t @ld4_2d(ptr %A) nounwind { 259; CHECK-LABEL: ld4_2d: 260; CHECK: // %bb.0: 261; CHECK-NEXT: ld4.2d { v0, v1, v2, v3 }, [x0] 262; CHECK-NEXT: ret 263; Make sure we are using the operands defined by the ABI 264 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A) 265 ret %struct.__neon_int64x2x4_t %tmp2 266} 267 268declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr) nounwind readonly 269declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr) nounwind readonly 270declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr) nounwind readonly 271 272%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> } 273%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } 274%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } 275 276 277define %struct.__neon_int64x1x2_t @ld2_1di64(ptr %A) nounwind { 278; CHECK-LABEL: ld2_1di64: 279; CHECK: // %bb.0: 280; CHECK-NEXT: ld1.1d { v0, v1 }, [x0] 281; CHECK-NEXT: ret 282; Make sure we are using the operands defined by the ABI 283 %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A) 284 ret %struct.__neon_int64x1x2_t %tmp2 285} 286 287define %struct.__neon_int64x1x3_t @ld3_1di64(ptr %A) nounwind { 288; CHECK-LABEL: ld3_1di64: 289; CHECK: // %bb.0: 290; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0] 291; CHECK-NEXT: ret 292; Make sure we are using the operands defined by the ABI 293 %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A) 294 ret %struct.__neon_int64x1x3_t %tmp2 295} 296 297define %struct.__neon_int64x1x4_t @ld4_1di64(ptr %A) nounwind { 298; CHECK-LABEL: ld4_1di64: 299; CHECK: // %bb.0: 300; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0] 301; CHECK-NEXT: ret 302; Make sure we are using the operands defined by the ABI 303 %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A) 304 ret %struct.__neon_int64x1x4_t %tmp2 305} 306 307 308declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr) nounwind readonly 309declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr) nounwind readonly 310declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr) nounwind readonly 311 312%struct.__neon_float64x1x2_t = type { <1 x double>, <1 x double> } 313%struct.__neon_float64x1x3_t = type { <1 x double>, <1 x double>, <1 x double> } 314%struct.__neon_float64x1x4_t = type { <1 x double>, <1 x double>, <1 x double>, <1 x double> } 315 316 317define %struct.__neon_float64x1x2_t @ld2_1df64(ptr %A) nounwind { 318; CHECK-LABEL: ld2_1df64: 319; CHECK: // %bb.0: 320; CHECK-NEXT: ld1.1d { v0, v1 }, [x0] 321; CHECK-NEXT: ret 322; Make sure we are using the operands defined by the ABI 323 %tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A) 324 ret %struct.__neon_float64x1x2_t %tmp2 325} 326 327define %struct.__neon_float64x1x3_t @ld3_1df64(ptr %A) nounwind { 328; CHECK-LABEL: ld3_1df64: 329; CHECK: // %bb.0: 330; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0] 331; CHECK-NEXT: ret 332; Make sure we are using the operands defined by the ABI 333 %tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A) 334 ret %struct.__neon_float64x1x3_t %tmp2 335} 336 337define %struct.__neon_float64x1x4_t @ld4_1df64(ptr %A) nounwind { 338; CHECK-LABEL: ld4_1df64: 339; CHECK: // %bb.0: 340; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0] 341; CHECK-NEXT: ret 342; Make sure we are using the operands defined by the ABI 343 %tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A) 344 ret %struct.__neon_float64x1x4_t %tmp2 345} 346 347declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr) nounwind readonly 348declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr) nounwind readonly 349declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwind readonly 350 351 352define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind { 353; Make sure we are using the operands defined by the ABI 354; CHECK-SD-LABEL: ld2lane_16b: 355; CHECK-SD: // %bb.0: 356; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 357; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 358; CHECK-SD-NEXT: ld2.b { v0, v1 }[1], [x0] 359; CHECK-SD-NEXT: ret 360; 361; CHECK-GI-LABEL: ld2lane_16b: 362; CHECK-GI: // %bb.0: 363; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 364; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 365; CHECK-GI-NEXT: ld2.b { v0, v1 }[1], [x0] 366; CHECK-GI-NEXT: ret 367 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A) 368 ret %struct.__neon_int8x16x2_t %tmp2 369} 370 371define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind { 372; Make sure we are using the operands defined by the ABI 373; CHECK-SD-LABEL: ld3lane_16b: 374; CHECK-SD: // %bb.0: 375; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 376; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 377; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 378; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[1], [x0] 379; CHECK-SD-NEXT: ret 380; 381; CHECK-GI-LABEL: ld3lane_16b: 382; CHECK-GI: // %bb.0: 383; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 384; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 385; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 386; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[1], [x0] 387; CHECK-GI-NEXT: ret 388 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A) 389 ret %struct.__neon_int8x16x3_t %tmp2 390} 391 392define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind { 393; Make sure we are using the operands defined by the ABI 394; CHECK-SD-LABEL: ld4lane_16b: 395; CHECK-SD: // %bb.0: 396; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 397; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 398; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 399; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 400; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] 401; CHECK-SD-NEXT: ret 402; 403; CHECK-GI-LABEL: ld4lane_16b: 404; CHECK-GI: // %bb.0: 405; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 406; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 407; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 408; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 409; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] 410; CHECK-GI-NEXT: ret 411 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A) 412 ret %struct.__neon_int8x16x4_t %tmp2 413} 414 415declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readonly 416declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly 417declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readonly 418 419define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind { 420; Make sure we are using the operands defined by the ABI 421; CHECK-SD-LABEL: ld2lane_8h: 422; CHECK-SD: // %bb.0: 423; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 424; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 425; CHECK-SD-NEXT: ld2.h { v0, v1 }[1], [x0] 426; CHECK-SD-NEXT: ret 427; 428; CHECK-GI-LABEL: ld2lane_8h: 429; CHECK-GI: // %bb.0: 430; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 431; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 432; CHECK-GI-NEXT: ld2.h { v0, v1 }[1], [x0] 433; CHECK-GI-NEXT: ret 434 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A) 435 ret %struct.__neon_int16x8x2_t %tmp2 436} 437 438define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind { 439; Make sure we are using the operands defined by the ABI 440; CHECK-SD-LABEL: ld3lane_8h: 441; CHECK-SD: // %bb.0: 442; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 443; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 444; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 445; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[1], [x0] 446; CHECK-SD-NEXT: ret 447; 448; CHECK-GI-LABEL: ld3lane_8h: 449; CHECK-GI: // %bb.0: 450; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 451; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 452; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 453; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[1], [x0] 454; CHECK-GI-NEXT: ret 455 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A) 456 ret %struct.__neon_int16x8x3_t %tmp2 457} 458 459define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind { 460; Make sure we are using the operands defined by the ABI 461; CHECK-SD-LABEL: ld4lane_8h: 462; CHECK-SD: // %bb.0: 463; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 464; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 465; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 466; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 467; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] 468; CHECK-SD-NEXT: ret 469; 470; CHECK-GI-LABEL: ld4lane_8h: 471; CHECK-GI: // %bb.0: 472; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 473; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 474; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 475; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 476; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] 477; CHECK-GI-NEXT: ret 478 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A) 479 ret %struct.__neon_int16x8x4_t %tmp2 480} 481 482declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readonly 483declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly 484declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readonly 485 486define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind { 487; Make sure we are using the operands defined by the ABI 488; CHECK-SD-LABEL: ld2lane_4s: 489; CHECK-SD: // %bb.0: 490; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 491; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 492; CHECK-SD-NEXT: ld2.s { v0, v1 }[1], [x0] 493; CHECK-SD-NEXT: ret 494; 495; CHECK-GI-LABEL: ld2lane_4s: 496; CHECK-GI: // %bb.0: 497; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 498; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 499; CHECK-GI-NEXT: ld2.s { v0, v1 }[1], [x0] 500; CHECK-GI-NEXT: ret 501 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A) 502 ret %struct.__neon_int32x4x2_t %tmp2 503} 504 505define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind { 506; Make sure we are using the operands defined by the ABI 507; CHECK-SD-LABEL: ld3lane_4s: 508; CHECK-SD: // %bb.0: 509; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 510; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 511; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 512; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[1], [x0] 513; CHECK-SD-NEXT: ret 514; 515; CHECK-GI-LABEL: ld3lane_4s: 516; CHECK-GI: // %bb.0: 517; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 518; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 519; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 520; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[1], [x0] 521; CHECK-GI-NEXT: ret 522 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A) 523 ret %struct.__neon_int32x4x3_t %tmp2 524} 525 526define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind { 527; Make sure we are using the operands defined by the ABI 528; CHECK-SD-LABEL: ld4lane_4s: 529; CHECK-SD: // %bb.0: 530; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 531; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 532; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 533; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 534; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] 535; CHECK-SD-NEXT: ret 536; 537; CHECK-GI-LABEL: ld4lane_4s: 538; CHECK-GI: // %bb.0: 539; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 540; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 541; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 542; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 543; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] 544; CHECK-GI-NEXT: ret 545 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A) 546 ret %struct.__neon_int32x4x4_t %tmp2 547} 548 549declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readonly 550declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly 551declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readonly 552 553define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind { 554; Make sure we are using the operands defined by the ABI 555; CHECK-SD-LABEL: ld2lane_2d: 556; CHECK-SD: // %bb.0: 557; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 558; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 559; CHECK-SD-NEXT: ld2.d { v0, v1 }[1], [x0] 560; CHECK-SD-NEXT: ret 561; 562; CHECK-GI-LABEL: ld2lane_2d: 563; CHECK-GI: // %bb.0: 564; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 565; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 566; CHECK-GI-NEXT: ld2.d { v0, v1 }[1], [x0] 567; CHECK-GI-NEXT: ret 568 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A) 569 ret %struct.__neon_int64x2x2_t %tmp2 570} 571 572define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind { 573; Make sure we are using the operands defined by the ABI 574; CHECK-SD-LABEL: ld3lane_2d: 575; CHECK-SD: // %bb.0: 576; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 577; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 578; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 579; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[1], [x0] 580; CHECK-SD-NEXT: ret 581; 582; CHECK-GI-LABEL: ld3lane_2d: 583; CHECK-GI: // %bb.0: 584; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 585; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 586; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 587; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[1], [x0] 588; CHECK-GI-NEXT: ret 589 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A) 590 ret %struct.__neon_int64x2x3_t %tmp2 591} 592 593define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind { 594; Make sure we are using the operands defined by the ABI 595; CHECK-SD-LABEL: ld4lane_2d: 596; CHECK-SD: // %bb.0: 597; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 598; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 599; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 600; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 601; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] 602; CHECK-SD-NEXT: ret 603; 604; CHECK-GI-LABEL: ld4lane_2d: 605; CHECK-GI: // %bb.0: 606; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 607; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 608; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 609; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 610; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] 611; CHECK-GI-NEXT: ret 612 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A) 613 ret %struct.__neon_int64x2x4_t %tmp2 614} 615 616declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readonly 617declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly 618declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly 619 620define <8 x i8> @ld1r_8b(ptr %bar) { 621; CHECK-LABEL: ld1r_8b: 622; CHECK: // %bb.0: 623; CHECK-NEXT: ld1r.8b { v0 }, [x0] 624; CHECK-NEXT: ret 625; Make sure we are using the operands defined by the ABI 626 %tmp1 = load i8, ptr %bar 627 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 628 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 629 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2 630 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3 631 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4 632 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5 633 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6 634 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7 635 ret <8 x i8> %tmp9 636} 637 638define <16 x i8> @ld1r_16b(ptr %bar) { 639; CHECK-LABEL: ld1r_16b: 640; CHECK: // %bb.0: 641; CHECK-NEXT: ld1r.16b { v0 }, [x0] 642; CHECK-NEXT: ret 643; Make sure we are using the operands defined by the ABI 644 %tmp1 = load i8, ptr %bar 645 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 646 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 647 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2 648 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3 649 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4 650 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5 651 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6 652 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7 653 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8 654 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9 655 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10 656 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11 657 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12 658 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13 659 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14 660 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15 661 ret <16 x i8> %tmp17 662} 663 664define <4 x i16> @ld1r_4h(ptr %bar) { 665; CHECK-LABEL: ld1r_4h: 666; CHECK: // %bb.0: 667; CHECK-NEXT: ld1r.4h { v0 }, [x0] 668; CHECK-NEXT: ret 669; Make sure we are using the operands defined by the ABI 670 %tmp1 = load i16, ptr %bar 671 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 672 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1 673 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2 674 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3 675 ret <4 x i16> %tmp5 676} 677 678define <8 x i16> @ld1r_8h(ptr %bar) { 679; CHECK-LABEL: ld1r_8h: 680; CHECK: // %bb.0: 681; CHECK-NEXT: ld1r.8h { v0 }, [x0] 682; CHECK-NEXT: ret 683; Make sure we are using the operands defined by the ABI 684 %tmp1 = load i16, ptr %bar 685 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 686 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1 687 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2 688 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3 689 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4 690 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5 691 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6 692 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7 693 ret <8 x i16> %tmp9 694} 695 696define <2 x i32> @ld1r_2s(ptr %bar) { 697; CHECK-LABEL: ld1r_2s: 698; CHECK: // %bb.0: 699; CHECK-NEXT: ld1r.2s { v0 }, [x0] 700; CHECK-NEXT: ret 701; Make sure we are using the operands defined by the ABI 702 %tmp1 = load i32, ptr %bar 703 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0 704 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1 705 ret <2 x i32> %tmp3 706} 707 708define <4 x i32> @ld1r_4s(ptr %bar) { 709; CHECK-LABEL: ld1r_4s: 710; CHECK: // %bb.0: 711; CHECK-NEXT: ld1r.4s { v0 }, [x0] 712; CHECK-NEXT: ret 713; Make sure we are using the operands defined by the ABI 714 %tmp1 = load i32, ptr %bar 715 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0 716 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 717 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2 718 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3 719 ret <4 x i32> %tmp5 720} 721 722define <2 x i64> @ld1r_2d(ptr %bar) { 723; CHECK-LABEL: ld1r_2d: 724; CHECK: // %bb.0: 725; CHECK-NEXT: ld1r.2d { v0 }, [x0] 726; CHECK-NEXT: ret 727; Make sure we are using the operands defined by the ABI 728 %tmp1 = load i64, ptr %bar 729 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0 730 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1 731 ret <2 x i64> %tmp3 732} 733 734define %struct.__neon_int8x8x2_t @ld2r_8b(ptr %A) nounwind { 735; CHECK-LABEL: ld2r_8b: 736; CHECK: // %bb.0: 737; CHECK-NEXT: ld2r.8b { v0, v1 }, [x0] 738; CHECK-NEXT: ret 739; Make sure we are using the operands defined by the ABI 740 %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A) 741 ret %struct.__neon_int8x8x2_t %tmp2 742} 743 744define %struct.__neon_int8x8x3_t @ld3r_8b(ptr %A) nounwind { 745; CHECK-LABEL: ld3r_8b: 746; CHECK: // %bb.0: 747; CHECK-NEXT: ld3r.8b { v0, v1, v2 }, [x0] 748; CHECK-NEXT: ret 749; Make sure we are using the operands defined by the ABI 750 %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A) 751 ret %struct.__neon_int8x8x3_t %tmp2 752} 753 754define %struct.__neon_int8x8x4_t @ld4r_8b(ptr %A) nounwind { 755; CHECK-LABEL: ld4r_8b: 756; CHECK: // %bb.0: 757; CHECK-NEXT: ld4r.8b { v0, v1, v2, v3 }, [x0] 758; CHECK-NEXT: ret 759; Make sure we are using the operands defined by the ABI 760 %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A) 761 ret %struct.__neon_int8x8x4_t %tmp2 762} 763 764declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr) nounwind readonly 765declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr) nounwind readonly 766declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr) nounwind readonly 767 768define %struct.__neon_int8x16x2_t @ld2r_16b(ptr %A) nounwind { 769; CHECK-LABEL: ld2r_16b: 770; CHECK: // %bb.0: 771; CHECK-NEXT: ld2r.16b { v0, v1 }, [x0] 772; CHECK-NEXT: ret 773; Make sure we are using the operands defined by the ABI 774 %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A) 775 ret %struct.__neon_int8x16x2_t %tmp2 776} 777 778define %struct.__neon_int8x16x3_t @ld3r_16b(ptr %A) nounwind { 779; CHECK-LABEL: ld3r_16b: 780; CHECK: // %bb.0: 781; CHECK-NEXT: ld3r.16b { v0, v1, v2 }, [x0] 782; CHECK-NEXT: ret 783; Make sure we are using the operands defined by the ABI 784 %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A) 785 ret %struct.__neon_int8x16x3_t %tmp2 786} 787 788define %struct.__neon_int8x16x4_t @ld4r_16b(ptr %A) nounwind { 789; CHECK-LABEL: ld4r_16b: 790; CHECK: // %bb.0: 791; CHECK-NEXT: ld4r.16b { v0, v1, v2, v3 }, [x0] 792; CHECK-NEXT: ret 793; Make sure we are using the operands defined by the ABI 794 %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A) 795 ret %struct.__neon_int8x16x4_t %tmp2 796} 797 798declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr) nounwind readonly 799declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr) nounwind readonly 800declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr) nounwind readonly 801 802define %struct.__neon_int16x4x2_t @ld2r_4h(ptr %A) nounwind { 803; CHECK-LABEL: ld2r_4h: 804; CHECK: // %bb.0: 805; CHECK-NEXT: ld2r.4h { v0, v1 }, [x0] 806; CHECK-NEXT: ret 807; Make sure we are using the operands defined by the ABI 808 %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A) 809 ret %struct.__neon_int16x4x2_t %tmp2 810} 811 812define %struct.__neon_int16x4x3_t @ld3r_4h(ptr %A) nounwind { 813; CHECK-LABEL: ld3r_4h: 814; CHECK: // %bb.0: 815; CHECK-NEXT: ld3r.4h { v0, v1, v2 }, [x0] 816; CHECK-NEXT: ret 817; Make sure we are using the operands defined by the ABI 818 %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A) 819 ret %struct.__neon_int16x4x3_t %tmp2 820} 821 822define %struct.__neon_int16x4x4_t @ld4r_4h(ptr %A) nounwind { 823; CHECK-LABEL: ld4r_4h: 824; CHECK: // %bb.0: 825; CHECK-NEXT: ld4r.4h { v0, v1, v2, v3 }, [x0] 826; CHECK-NEXT: ret 827; Make sure we are using the operands defined by the ABI 828 %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A) 829 ret %struct.__neon_int16x4x4_t %tmp2 830} 831 832declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr) nounwind readonly 833declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr) nounwind readonly 834declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr) nounwind readonly 835 836define %struct.__neon_int16x8x2_t @ld2r_8h(ptr %A) nounwind { 837; CHECK-LABEL: ld2r_8h: 838; CHECK: // %bb.0: 839; CHECK-NEXT: ld2r.8h { v0, v1 }, [x0] 840; CHECK-NEXT: ret 841; Make sure we are using the operands defined by the ABI 842 %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A) 843 ret %struct.__neon_int16x8x2_t %tmp2 844} 845 846define %struct.__neon_int16x8x3_t @ld3r_8h(ptr %A) nounwind { 847; CHECK-LABEL: ld3r_8h: 848; CHECK: // %bb.0: 849; CHECK-NEXT: ld3r.8h { v0, v1, v2 }, [x0] 850; CHECK-NEXT: ret 851; Make sure we are using the operands defined by the ABI 852 %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A) 853 ret %struct.__neon_int16x8x3_t %tmp2 854} 855 856define %struct.__neon_int16x8x4_t @ld4r_8h(ptr %A) nounwind { 857; CHECK-LABEL: ld4r_8h: 858; CHECK: // %bb.0: 859; CHECK-NEXT: ld4r.8h { v0, v1, v2, v3 }, [x0] 860; CHECK-NEXT: ret 861; Make sure we are using the operands defined by the ABI 862 %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A) 863 ret %struct.__neon_int16x8x4_t %tmp2 864} 865 866declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr) nounwind readonly 867declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr) nounwind readonly 868declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr) nounwind readonly 869 870define %struct.__neon_int32x2x2_t @ld2r_2s(ptr %A) nounwind { 871; CHECK-LABEL: ld2r_2s: 872; CHECK: // %bb.0: 873; CHECK-NEXT: ld2r.2s { v0, v1 }, [x0] 874; CHECK-NEXT: ret 875; Make sure we are using the operands defined by the ABI 876 %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A) 877 ret %struct.__neon_int32x2x2_t %tmp2 878} 879 880define %struct.__neon_int32x2x3_t @ld3r_2s(ptr %A) nounwind { 881; CHECK-LABEL: ld3r_2s: 882; CHECK: // %bb.0: 883; CHECK-NEXT: ld3r.2s { v0, v1, v2 }, [x0] 884; CHECK-NEXT: ret 885; Make sure we are using the operands defined by the ABI 886 %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A) 887 ret %struct.__neon_int32x2x3_t %tmp2 888} 889 890define %struct.__neon_int32x2x4_t @ld4r_2s(ptr %A) nounwind { 891; CHECK-LABEL: ld4r_2s: 892; CHECK: // %bb.0: 893; CHECK-NEXT: ld4r.2s { v0, v1, v2, v3 }, [x0] 894; CHECK-NEXT: ret 895; Make sure we are using the operands defined by the ABI 896 %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A) 897 ret %struct.__neon_int32x2x4_t %tmp2 898} 899 900declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr) nounwind readonly 901declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr) nounwind readonly 902declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr) nounwind readonly 903 904define %struct.__neon_int32x4x2_t @ld2r_4s(ptr %A) nounwind { 905; CHECK-LABEL: ld2r_4s: 906; CHECK: // %bb.0: 907; CHECK-NEXT: ld2r.4s { v0, v1 }, [x0] 908; CHECK-NEXT: ret 909; Make sure we are using the operands defined by the ABI 910 %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A) 911 ret %struct.__neon_int32x4x2_t %tmp2 912} 913 914define %struct.__neon_int32x4x3_t @ld3r_4s(ptr %A) nounwind { 915; CHECK-LABEL: ld3r_4s: 916; CHECK: // %bb.0: 917; CHECK-NEXT: ld3r.4s { v0, v1, v2 }, [x0] 918; CHECK-NEXT: ret 919; Make sure we are using the operands defined by the ABI 920 %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A) 921 ret %struct.__neon_int32x4x3_t %tmp2 922} 923 924define %struct.__neon_int32x4x4_t @ld4r_4s(ptr %A) nounwind { 925; CHECK-LABEL: ld4r_4s: 926; CHECK: // %bb.0: 927; CHECK-NEXT: ld4r.4s { v0, v1, v2, v3 }, [x0] 928; CHECK-NEXT: ret 929; Make sure we are using the operands defined by the ABI 930 %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A) 931 ret %struct.__neon_int32x4x4_t %tmp2 932} 933 934declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr) nounwind readonly 935declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr) nounwind readonly 936declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly 937 938define %struct.__neon_int64x1x2_t @ld2r_1d(ptr %A) nounwind { 939; CHECK-LABEL: ld2r_1d: 940; CHECK: // %bb.0: 941; CHECK-NEXT: ld2r.1d { v0, v1 }, [x0] 942; CHECK-NEXT: ret 943; Make sure we are using the operands defined by the ABI 944 %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A) 945 ret %struct.__neon_int64x1x2_t %tmp2 946} 947 948define %struct.__neon_int64x1x3_t @ld3r_1d(ptr %A) nounwind { 949; CHECK-LABEL: ld3r_1d: 950; CHECK: // %bb.0: 951; CHECK-NEXT: ld3r.1d { v0, v1, v2 }, [x0] 952; CHECK-NEXT: ret 953; Make sure we are using the operands defined by the ABI 954 %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A) 955 ret %struct.__neon_int64x1x3_t %tmp2 956} 957 958define %struct.__neon_int64x1x4_t @ld4r_1d(ptr %A) nounwind { 959; CHECK-LABEL: ld4r_1d: 960; CHECK: // %bb.0: 961; CHECK-NEXT: ld4r.1d { v0, v1, v2, v3 }, [x0] 962; CHECK-NEXT: ret 963; Make sure we are using the operands defined by the ABI 964 %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A) 965 ret %struct.__neon_int64x1x4_t %tmp2 966} 967 968declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr) nounwind readonly 969declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr) nounwind readonly 970declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr) nounwind readonly 971 972define %struct.__neon_int64x2x2_t @ld2r_2d(ptr %A) nounwind { 973; CHECK-LABEL: ld2r_2d: 974; CHECK: // %bb.0: 975; CHECK-NEXT: ld2r.2d { v0, v1 }, [x0] 976; CHECK-NEXT: ret 977; Make sure we are using the operands defined by the ABI 978 %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A) 979 ret %struct.__neon_int64x2x2_t %tmp2 980} 981 982define %struct.__neon_int64x2x3_t @ld3r_2d(ptr %A) nounwind { 983; CHECK-LABEL: ld3r_2d: 984; CHECK: // %bb.0: 985; CHECK-NEXT: ld3r.2d { v0, v1, v2 }, [x0] 986; CHECK-NEXT: ret 987; Make sure we are using the operands defined by the ABI 988 %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A) 989 ret %struct.__neon_int64x2x3_t %tmp2 990} 991 992define %struct.__neon_int64x2x4_t @ld4r_2d(ptr %A) nounwind { 993; CHECK-LABEL: ld4r_2d: 994; CHECK: // %bb.0: 995; CHECK-NEXT: ld4r.2d { v0, v1, v2, v3 }, [x0] 996; CHECK-NEXT: ret 997; Make sure we are using the operands defined by the ABI 998 %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A) 999 ret %struct.__neon_int64x2x4_t %tmp2 1000} 1001 1002declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr) nounwind readonly 1003declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr) nounwind readonly 1004declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly 1005 1006define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) { 1007; CHECK-SD-LABEL: ld1_16b: 1008; CHECK-SD: // %bb.0: 1009; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] 1010; CHECK-SD-NEXT: ret 1011; 1012; CHECK-GI-LABEL: ld1_16b: 1013; CHECK-GI: // %bb.0: 1014; CHECK-GI-NEXT: ldr b1, [x0] 1015; CHECK-GI-NEXT: mov.b v0[0], v1[0] 1016; CHECK-GI-NEXT: ret 1017; Make sure we are using the operands defined by the ABI 1018 %tmp1 = load i8, ptr %bar 1019 %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0 1020 ret <16 x i8> %tmp2 1021} 1022 1023define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) { 1024; CHECK-LABEL: ld1_8h: 1025; CHECK: // %bb.0: 1026; CHECK-NEXT: ld1.h { v0 }[0], [x0] 1027; CHECK-NEXT: ret 1028; Make sure we are using the operands defined by the ABI 1029 %tmp1 = load i16, ptr %bar 1030 %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0 1031 ret <8 x i16> %tmp2 1032} 1033 1034define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) { 1035; CHECK-LABEL: ld1_4s: 1036; CHECK: // %bb.0: 1037; CHECK-NEXT: ld1.s { v0 }[0], [x0] 1038; CHECK-NEXT: ret 1039; Make sure we are using the operands defined by the ABI 1040 %tmp1 = load i32, ptr %bar 1041 %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0 1042 ret <4 x i32> %tmp2 1043} 1044 1045define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) { 1046; CHECK-LABEL: ld1_4s_float: 1047; CHECK: // %bb.0: 1048; CHECK-NEXT: ld1.s { v0 }[0], [x0] 1049; CHECK-NEXT: ret 1050; Make sure we are using the operands defined by the ABI 1051 %tmp1 = load float, ptr %bar 1052 %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0 1053 ret <4 x float> %tmp2 1054} 1055 1056define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) { 1057; CHECK-LABEL: ld1_2d: 1058; CHECK: // %bb.0: 1059; CHECK-NEXT: ld1.d { v0 }[0], [x0] 1060; CHECK-NEXT: ret 1061; Make sure we are using the operands defined by the ABI 1062 %tmp1 = load i64, ptr %bar 1063 %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0 1064 ret <2 x i64> %tmp2 1065} 1066 1067define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) { 1068; CHECK-LABEL: ld1_2d_double: 1069; CHECK: // %bb.0: 1070; CHECK-NEXT: ld1.d { v0 }[0], [x0] 1071; CHECK-NEXT: ret 1072; Make sure we are using the operands defined by the ABI 1073 %tmp1 = load double, ptr %bar 1074 %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0 1075 ret <2 x double> %tmp2 1076} 1077 1078define <1 x i64> @ld1_1d(ptr %p) { 1079; CHECK-LABEL: ld1_1d: 1080; CHECK: // %bb.0: 1081; CHECK-NEXT: ldr d0, [x0] 1082; CHECK-NEXT: ret 1083; Make sure we are using the operands defined by the ABI 1084 %tmp = load <1 x i64>, ptr %p, align 8 1085 ret <1 x i64> %tmp 1086} 1087 1088define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) { 1089; CHECK-SD-LABEL: ld1_8b: 1090; CHECK-SD: // %bb.0: 1091; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1092; CHECK-SD-NEXT: ld1.b { v0 }[0], [x0] 1093; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 1094; CHECK-SD-NEXT: ret 1095; 1096; CHECK-GI-LABEL: ld1_8b: 1097; CHECK-GI: // %bb.0: 1098; CHECK-GI-NEXT: ldr b1, [x0] 1099; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1100; CHECK-GI-NEXT: mov.b v0[0], v1[0] 1101; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 1102; CHECK-GI-NEXT: ret 1103; Make sure we are using the operands defined by the ABI 1104 %tmp1 = load i8, ptr %bar 1105 %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0 1106 ret <8 x i8> %tmp2 1107} 1108 1109define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) { 1110; CHECK-LABEL: ld1_4h: 1111; CHECK: // %bb.0: 1112; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1113; CHECK-NEXT: ld1.h { v0 }[0], [x0] 1114; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1115; CHECK-NEXT: ret 1116; Make sure we are using the operands defined by the ABI 1117 %tmp1 = load i16, ptr %bar 1118 %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0 1119 ret <4 x i16> %tmp2 1120} 1121 1122define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) { 1123; CHECK-LABEL: ld1_2s: 1124; CHECK: // %bb.0: 1125; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1126; CHECK-NEXT: ld1.s { v0 }[0], [x0] 1127; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1128; CHECK-NEXT: ret 1129; Make sure we are using the operands defined by the ABI 1130 %tmp1 = load i32, ptr %bar 1131 %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0 1132 ret <2 x i32> %tmp2 1133} 1134 1135define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) { 1136; CHECK-LABEL: ld1_2s_float: 1137; CHECK: // %bb.0: 1138; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1139; CHECK-NEXT: ld1.s { v0 }[0], [x0] 1140; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1141; CHECK-NEXT: ret 1142; Make sure we are using the operands defined by the ABI 1143 %tmp1 = load float, ptr %bar 1144 %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0 1145 ret <2 x float> %tmp2 1146} 1147 1148 1149; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s 1150define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %diff) nounwind ssp { 1151; CHECK-SD-LABEL: ld1r_2s_from_dup: 1152; CHECK-SD: // %bb.0: // %entry 1153; CHECK-SD-NEXT: ldr s0, [x0] 1154; CHECK-SD-NEXT: ldr s1, [x1] 1155; CHECK-SD-NEXT: usubl.8h v0, v0, v1 1156; CHECK-SD-NEXT: str d0, [x2] 1157; CHECK-SD-NEXT: ret 1158; 1159; CHECK-GI-LABEL: ld1r_2s_from_dup: 1160; CHECK-GI: // %bb.0: // %entry 1161; CHECK-GI-NEXT: ld1r.2s { v0 }, [x0] 1162; CHECK-GI-NEXT: ld1r.2s { v1 }, [x1] 1163; CHECK-GI-NEXT: usubl.8h v0, v0, v1 1164; CHECK-GI-NEXT: str d0, [x2] 1165; CHECK-GI-NEXT: ret 1166entry: 1167 %tmp1 = load i32, ptr %a, align 4 1168 %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 1169 %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer 1170 %tmp3 = bitcast <2 x i32> %lane to <8 x i8> 1171 %tmp5 = load i32, ptr %b, align 4 1172 %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0 1173 %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer 1174 %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8> 1175 %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16> 1176 %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16> 1177 %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i 1178 %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64> 1179 %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer 1180 %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16> 1181 store <4 x i16> %tmp9, ptr %diff, align 8 1182 ret void 1183} 1184 1185; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal 1186define <4 x float> @ld1r_4s_float(ptr nocapture %x) { 1187; CHECK-LABEL: ld1r_4s_float: 1188; CHECK: // %bb.0: // %entry 1189; CHECK-NEXT: ld1r.4s { v0 }, [x0] 1190; CHECK-NEXT: ret 1191entry: 1192; Make sure we are using the operands defined by the ABI 1193 %tmp = load float, ptr %x, align 4 1194 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 1195 %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1 1196 %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2 1197 %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3 1198 ret <4 x float> %tmp4 1199} 1200 1201define <2 x float> @ld1r_2s_float(ptr nocapture %x) { 1202; CHECK-LABEL: ld1r_2s_float: 1203; CHECK: // %bb.0: // %entry 1204; CHECK-NEXT: ld1r.2s { v0 }, [x0] 1205; CHECK-NEXT: ret 1206entry: 1207; Make sure we are using the operands defined by the ABI 1208 %tmp = load float, ptr %x, align 4 1209 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 1210 %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1 1211 ret <2 x float> %tmp2 1212} 1213 1214define <2 x double> @ld1r_2d_double(ptr nocapture %x) { 1215; CHECK-LABEL: ld1r_2d_double: 1216; CHECK: // %bb.0: // %entry 1217; CHECK-NEXT: ld1r.2d { v0 }, [x0] 1218; CHECK-NEXT: ret 1219entry: 1220; Make sure we are using the operands defined by the ABI 1221 %tmp = load double, ptr %x, align 4 1222 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 1223 %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1 1224 ret <2 x double> %tmp2 1225} 1226 1227define <1 x double> @ld1r_1d_double(ptr nocapture %x) { 1228; CHECK-LABEL: ld1r_1d_double: 1229; CHECK: // %bb.0: // %entry 1230; CHECK-NEXT: ldr d0, [x0] 1231; CHECK-NEXT: ret 1232entry: 1233; Make sure we are using the operands defined by the ABI 1234 %tmp = load double, ptr %x, align 4 1235 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0 1236 ret <1 x double> %tmp1 1237} 1238 1239define <4 x float> @ld1r_4s_float_shuff(ptr nocapture %x) { 1240; CHECK-LABEL: ld1r_4s_float_shuff: 1241; CHECK: // %bb.0: // %entry 1242; CHECK-NEXT: ld1r.4s { v0 }, [x0] 1243; CHECK-NEXT: ret 1244entry: 1245; Make sure we are using the operands defined by the ABI 1246 %tmp = load float, ptr %x, align 4 1247 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 1248 %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer 1249 ret <4 x float> %lane 1250} 1251 1252define <2 x float> @ld1r_2s_float_shuff(ptr nocapture %x) { 1253; CHECK-LABEL: ld1r_2s_float_shuff: 1254; CHECK: // %bb.0: // %entry 1255; CHECK-NEXT: ld1r.2s { v0 }, [x0] 1256; CHECK-NEXT: ret 1257entry: 1258; Make sure we are using the operands defined by the ABI 1259 %tmp = load float, ptr %x, align 4 1260 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 1261 %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer 1262 ret <2 x float> %lane 1263} 1264 1265define <2 x double> @ld1r_2d_double_shuff(ptr nocapture %x) { 1266; CHECK-LABEL: ld1r_2d_double_shuff: 1267; CHECK: // %bb.0: // %entry 1268; CHECK-NEXT: ld1r.2d { v0 }, [x0] 1269; CHECK-NEXT: ret 1270entry: 1271; Make sure we are using the operands defined by the ABI 1272 %tmp = load double, ptr %x, align 4 1273 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 1274 %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer 1275 ret <2 x double> %lane 1276} 1277 1278define <1 x double> @ld1r_1d_double_shuff(ptr nocapture %x) { 1279; CHECK-LABEL: ld1r_1d_double_shuff: 1280; CHECK: // %bb.0: // %entry 1281; CHECK-NEXT: ldr d0, [x0] 1282; CHECK-NEXT: ret 1283entry: 1284; Make sure we are using the operands defined by the ABI 1285 %tmp = load double, ptr %x, align 4 1286 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0 1287 %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer 1288 ret <1 x double> %lane 1289} 1290 1291%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } 1292%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } 1293%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } 1294 1295declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr) nounwind readonly 1296declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr) nounwind readonly 1297declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr) nounwind readonly 1298declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr) nounwind readonly 1299declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr) nounwind readonly 1300declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr) nounwind readonly 1301 1302define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(ptr %addr) { 1303; CHECK-LABEL: ld1_x2_v8i8: 1304; CHECK: // %bb.0: 1305; CHECK-NEXT: ld1.8b { v0, v1 }, [x0] 1306; CHECK-NEXT: ret 1307 %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %addr) 1308 ret %struct.__neon_int8x8x2_t %val 1309} 1310 1311define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(ptr %addr) { 1312; CHECK-LABEL: ld1_x2_v4i16: 1313; CHECK: // %bb.0: 1314; CHECK-NEXT: ld1.4h { v0, v1 }, [x0] 1315; CHECK-NEXT: ret 1316 %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %addr) 1317 ret %struct.__neon_int16x4x2_t %val 1318} 1319 1320define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(ptr %addr) { 1321; CHECK-LABEL: ld1_x2_v2i32: 1322; CHECK: // %bb.0: 1323; CHECK-NEXT: ld1.2s { v0, v1 }, [x0] 1324; CHECK-NEXT: ret 1325 %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %addr) 1326 ret %struct.__neon_int32x2x2_t %val 1327} 1328 1329define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(ptr %addr) { 1330; CHECK-LABEL: ld1_x2_v2f32: 1331; CHECK: // %bb.0: 1332; CHECK-NEXT: ld1.2s { v0, v1 }, [x0] 1333; CHECK-NEXT: ret 1334 %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %addr) 1335 ret %struct.__neon_float32x2x2_t %val 1336} 1337 1338define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(ptr %addr) { 1339; CHECK-LABEL: ld1_x2_v1i64: 1340; CHECK: // %bb.0: 1341; CHECK-NEXT: ld1.1d { v0, v1 }, [x0] 1342; CHECK-NEXT: ret 1343 %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %addr) 1344 ret %struct.__neon_int64x1x2_t %val 1345} 1346 1347define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(ptr %addr) { 1348; CHECK-LABEL: ld1_x2_v1f64: 1349; CHECK: // %bb.0: 1350; CHECK-NEXT: ld1.1d { v0, v1 }, [x0] 1351; CHECK-NEXT: ret 1352 %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %addr) 1353 ret %struct.__neon_float64x1x2_t %val 1354} 1355 1356 1357%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> } 1358%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } 1359%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> } 1360 1361%struct.__neon_float64x2x2_t = type { <2 x double>, <2 x double> } 1362%struct.__neon_float64x2x3_t = type { <2 x double>, <2 x double>, <2 x double> } 1363%struct.__neon_float64x2x4_t = type { <2 x double>, <2 x double>, <2 x double>, <2 x double> } 1364 1365declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr) nounwind readonly 1366declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr) nounwind readonly 1367declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr) nounwind readonly 1368declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr) nounwind readonly 1369declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr) nounwind readonly 1370declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr) nounwind readonly 1371 1372define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(ptr %addr) { 1373; CHECK-LABEL: ld1_x2_v16i8: 1374; CHECK: // %bb.0: 1375; CHECK-NEXT: ld1.16b { v0, v1 }, [x0] 1376; CHECK-NEXT: ret 1377 %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %addr) 1378 ret %struct.__neon_int8x16x2_t %val 1379} 1380 1381define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(ptr %addr) { 1382; CHECK-LABEL: ld1_x2_v8i16: 1383; CHECK: // %bb.0: 1384; CHECK-NEXT: ld1.8h { v0, v1 }, [x0] 1385; CHECK-NEXT: ret 1386 %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %addr) 1387 ret %struct.__neon_int16x8x2_t %val 1388} 1389 1390define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(ptr %addr) { 1391; CHECK-LABEL: ld1_x2_v4i32: 1392; CHECK: // %bb.0: 1393; CHECK-NEXT: ld1.4s { v0, v1 }, [x0] 1394; CHECK-NEXT: ret 1395 %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %addr) 1396 ret %struct.__neon_int32x4x2_t %val 1397} 1398 1399define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(ptr %addr) { 1400; CHECK-LABEL: ld1_x2_v4f32: 1401; CHECK: // %bb.0: 1402; CHECK-NEXT: ld1.4s { v0, v1 }, [x0] 1403; CHECK-NEXT: ret 1404 %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %addr) 1405 ret %struct.__neon_float32x4x2_t %val 1406} 1407 1408define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(ptr %addr) { 1409; CHECK-LABEL: ld1_x2_v2i64: 1410; CHECK: // %bb.0: 1411; CHECK-NEXT: ld1.2d { v0, v1 }, [x0] 1412; CHECK-NEXT: ret 1413 %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %addr) 1414 ret %struct.__neon_int64x2x2_t %val 1415} 1416 1417define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(ptr %addr) { 1418; CHECK-LABEL: ld1_x2_v2f64: 1419; CHECK: // %bb.0: 1420; CHECK-NEXT: ld1.2d { v0, v1 }, [x0] 1421; CHECK-NEXT: ret 1422 %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %addr) 1423 ret %struct.__neon_float64x2x2_t %val 1424} 1425 1426declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr) nounwind readonly 1427declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr) nounwind readonly 1428declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr) nounwind readonly 1429declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr) nounwind readonly 1430declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr) nounwind readonly 1431declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr) nounwind readonly 1432 1433define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(ptr %addr) { 1434; CHECK-LABEL: ld1_x3_v8i8: 1435; CHECK: // %bb.0: 1436; CHECK-NEXT: ld1.8b { v0, v1, v2 }, [x0] 1437; CHECK-NEXT: ret 1438 %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %addr) 1439 ret %struct.__neon_int8x8x3_t %val 1440} 1441 1442define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(ptr %addr) { 1443; CHECK-LABEL: ld1_x3_v4i16: 1444; CHECK: // %bb.0: 1445; CHECK-NEXT: ld1.4h { v0, v1, v2 }, [x0] 1446; CHECK-NEXT: ret 1447 %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %addr) 1448 ret %struct.__neon_int16x4x3_t %val 1449} 1450 1451define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(ptr %addr) { 1452; CHECK-LABEL: ld1_x3_v2i32: 1453; CHECK: // %bb.0: 1454; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0] 1455; CHECK-NEXT: ret 1456 %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %addr) 1457 ret %struct.__neon_int32x2x3_t %val 1458} 1459 1460define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(ptr %addr) { 1461; CHECK-LABEL: ld1_x3_v2f32: 1462; CHECK: // %bb.0: 1463; CHECK-NEXT: ld1.2s { v0, v1, v2 }, [x0] 1464; CHECK-NEXT: ret 1465 %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %addr) 1466 ret %struct.__neon_float32x2x3_t %val 1467} 1468 1469define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(ptr %addr) { 1470; CHECK-LABEL: ld1_x3_v1i64: 1471; CHECK: // %bb.0: 1472; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0] 1473; CHECK-NEXT: ret 1474 %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %addr) 1475 ret %struct.__neon_int64x1x3_t %val 1476} 1477 1478define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(ptr %addr) { 1479; CHECK-LABEL: ld1_x3_v1f64: 1480; CHECK: // %bb.0: 1481; CHECK-NEXT: ld1.1d { v0, v1, v2 }, [x0] 1482; CHECK-NEXT: ret 1483 %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %addr) 1484 ret %struct.__neon_float64x1x3_t %val 1485} 1486 1487declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr) nounwind readonly 1488declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr) nounwind readonly 1489declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr) nounwind readonly 1490declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr) nounwind readonly 1491declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr) nounwind readonly 1492declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr) nounwind readonly 1493 1494define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(ptr %addr) { 1495; CHECK-LABEL: ld1_x3_v16i8: 1496; CHECK: // %bb.0: 1497; CHECK-NEXT: ld1.16b { v0, v1, v2 }, [x0] 1498; CHECK-NEXT: ret 1499 %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %addr) 1500 ret %struct.__neon_int8x16x3_t %val 1501} 1502 1503define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(ptr %addr) { 1504; CHECK-LABEL: ld1_x3_v8i16: 1505; CHECK: // %bb.0: 1506; CHECK-NEXT: ld1.8h { v0, v1, v2 }, [x0] 1507; CHECK-NEXT: ret 1508 %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %addr) 1509 ret %struct.__neon_int16x8x3_t %val 1510} 1511 1512define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(ptr %addr) { 1513; CHECK-LABEL: ld1_x3_v4i32: 1514; CHECK: // %bb.0: 1515; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0] 1516; CHECK-NEXT: ret 1517 %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %addr) 1518 ret %struct.__neon_int32x4x3_t %val 1519} 1520 1521define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(ptr %addr) { 1522; CHECK-LABEL: ld1_x3_v4f32: 1523; CHECK: // %bb.0: 1524; CHECK-NEXT: ld1.4s { v0, v1, v2 }, [x0] 1525; CHECK-NEXT: ret 1526 %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %addr) 1527 ret %struct.__neon_float32x4x3_t %val 1528} 1529 1530define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(ptr %addr) { 1531; CHECK-LABEL: ld1_x3_v2i64: 1532; CHECK: // %bb.0: 1533; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0] 1534; CHECK-NEXT: ret 1535 %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %addr) 1536 ret %struct.__neon_int64x2x3_t %val 1537} 1538 1539define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(ptr %addr) { 1540; CHECK-LABEL: ld1_x3_v2f64: 1541; CHECK: // %bb.0: 1542; CHECK-NEXT: ld1.2d { v0, v1, v2 }, [x0] 1543; CHECK-NEXT: ret 1544 %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %addr) 1545 ret %struct.__neon_float64x2x3_t %val 1546} 1547 1548declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr) nounwind readonly 1549declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr) nounwind readonly 1550declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr) nounwind readonly 1551declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr) nounwind readonly 1552declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr) nounwind readonly 1553declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr) nounwind readonly 1554 1555define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(ptr %addr) { 1556; CHECK-LABEL: ld1_x4_v8i8: 1557; CHECK: // %bb.0: 1558; CHECK-NEXT: ld1.8b { v0, v1, v2, v3 }, [x0] 1559; CHECK-NEXT: ret 1560 %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %addr) 1561 ret %struct.__neon_int8x8x4_t %val 1562} 1563 1564define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(ptr %addr) { 1565; CHECK-LABEL: ld1_x4_v4i16: 1566; CHECK: // %bb.0: 1567; CHECK-NEXT: ld1.4h { v0, v1, v2, v3 }, [x0] 1568; CHECK-NEXT: ret 1569 %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %addr) 1570 ret %struct.__neon_int16x4x4_t %val 1571} 1572 1573define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(ptr %addr) { 1574; CHECK-LABEL: ld1_x4_v2i32: 1575; CHECK: // %bb.0: 1576; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0] 1577; CHECK-NEXT: ret 1578 %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %addr) 1579 ret %struct.__neon_int32x2x4_t %val 1580} 1581 1582define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(ptr %addr) { 1583; CHECK-LABEL: ld1_x4_v2f32: 1584; CHECK: // %bb.0: 1585; CHECK-NEXT: ld1.2s { v0, v1, v2, v3 }, [x0] 1586; CHECK-NEXT: ret 1587 %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %addr) 1588 ret %struct.__neon_float32x2x4_t %val 1589} 1590 1591define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(ptr %addr) { 1592; CHECK-LABEL: ld1_x4_v1i64: 1593; CHECK: // %bb.0: 1594; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0] 1595; CHECK-NEXT: ret 1596 %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %addr) 1597 ret %struct.__neon_int64x1x4_t %val 1598} 1599 1600define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(ptr %addr) { 1601; CHECK-LABEL: ld1_x4_v1f64: 1602; CHECK: // %bb.0: 1603; CHECK-NEXT: ld1.1d { v0, v1, v2, v3 }, [x0] 1604; CHECK-NEXT: ret 1605 %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %addr) 1606 ret %struct.__neon_float64x1x4_t %val 1607} 1608 1609declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr) nounwind readonly 1610declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr) nounwind readonly 1611declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr) nounwind readonly 1612declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr) nounwind readonly 1613declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr) nounwind readonly 1614declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr) nounwind readonly 1615 1616define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(ptr %addr) { 1617; CHECK-LABEL: ld1_x4_v16i8: 1618; CHECK: // %bb.0: 1619; CHECK-NEXT: ld1.16b { v0, v1, v2, v3 }, [x0] 1620; CHECK-NEXT: ret 1621 %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %addr) 1622 ret %struct.__neon_int8x16x4_t %val 1623} 1624 1625define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(ptr %addr) { 1626; CHECK-LABEL: ld1_x4_v8i16: 1627; CHECK: // %bb.0: 1628; CHECK-NEXT: ld1.8h { v0, v1, v2, v3 }, [x0] 1629; CHECK-NEXT: ret 1630 %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %addr) 1631 ret %struct.__neon_int16x8x4_t %val 1632} 1633 1634define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(ptr %addr) { 1635; CHECK-LABEL: ld1_x4_v4i32: 1636; CHECK: // %bb.0: 1637; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0] 1638; CHECK-NEXT: ret 1639 %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %addr) 1640 ret %struct.__neon_int32x4x4_t %val 1641} 1642 1643define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(ptr %addr) { 1644; CHECK-LABEL: ld1_x4_v4f32: 1645; CHECK: // %bb.0: 1646; CHECK-NEXT: ld1.4s { v0, v1, v2, v3 }, [x0] 1647; CHECK-NEXT: ret 1648 %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %addr) 1649 ret %struct.__neon_float32x4x4_t %val 1650} 1651 1652define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(ptr %addr) { 1653; CHECK-LABEL: ld1_x4_v2i64: 1654; CHECK: // %bb.0: 1655; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0] 1656; CHECK-NEXT: ret 1657 %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %addr) 1658 ret %struct.__neon_int64x2x4_t %val 1659} 1660 1661define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) { 1662; CHECK-LABEL: ld1_x4_v2f64: 1663; CHECK: // %bb.0: 1664; CHECK-NEXT: ld1.2d { v0, v1, v2, v3 }, [x0] 1665; CHECK-NEXT: ret 1666 %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %addr) 1667 ret %struct.__neon_float64x2x4_t %val 1668} 1669 1670define <8 x i8> @dup_ld1_from_stack(ptr %__ret) { 1671; CHECK-SD-LABEL: dup_ld1_from_stack: 1672; CHECK-SD: // %bb.0: // %entry 1673; CHECK-SD-NEXT: sub sp, sp, #16 1674; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 1675; CHECK-SD-NEXT: add x8, sp, #15 1676; CHECK-SD-NEXT: ld1r.8b { v0 }, [x8] 1677; CHECK-SD-NEXT: add sp, sp, #16 1678; CHECK-SD-NEXT: ret 1679; 1680; CHECK-GI-LABEL: dup_ld1_from_stack: 1681; CHECK-GI: // %bb.0: // %entry 1682; CHECK-GI-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 1683; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 1684; CHECK-GI-NEXT: .cfi_offset w29, -16 1685; CHECK-GI-NEXT: add x8, sp, #15 1686; CHECK-GI-NEXT: ld1r.8b { v0 }, [x8] 1687; CHECK-GI-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 1688; CHECK-GI-NEXT: ret 1689entry: 1690 %item = alloca i8, align 1 1691 %0 = load i8, ptr %item, align 1 1692 %1 = insertelement <8 x i8> poison, i8 %0, i32 0 1693 %lane = shufflevector <8 x i8> %1, <8 x i8> %1, <8 x i32> zeroinitializer 1694 ret <8 x i8> %lane 1695} 1696