1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3 4target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" 5target triple = "armv8-arm-none-eabi" 6 7; Turning a vld1 intrinsic into an llvm load is beneficial 8; when the underlying object being addressed comes from a 9; constant, since we get constant-folding for free. 10 11; Bail the optimization if the alignment is not a constant. 12define <2 x i64> @vld1_align(ptr %ptr, i32 %align) { 13; CHECK-LABEL: @vld1_align( 14; CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 [[ALIGN:%.*]]) 15; CHECK-NEXT: ret <2 x i64> [[VLD1]] 16; 17 %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 %align) 18 ret <2 x i64> %vld1 19} 20 21; Bail the optimization if the alignment is not power of 2. 22define <2 x i64> @vld1_align_pow2(ptr %ptr) { 23; CHECK-LABEL: @vld1_align_pow2( 24; CHECK-NEXT: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 3) 25; CHECK-NEXT: ret <2 x i64> [[VLD1]] 26; 27 %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 3) 28 ret <2 x i64> %vld1 29} 30 31define <8 x i8> @vld1_8x8(ptr %ptr) { 32; CHECK-LABEL: @vld1_8x8( 33; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1 34; CHECK-NEXT: ret <8 x i8> [[TMP2]] 35; 36 %vld1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %ptr, i32 1) 37 ret <8 x i8> %vld1 38} 39 40define <4 x i16> @vld1_4x16(ptr %ptr) { 41; CHECK-LABEL: @vld1_4x16( 42; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2 43; CHECK-NEXT: ret <4 x i16> [[TMP2]] 44; 45 %vld1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %ptr, i32 2) 46 ret <4 x i16> %vld1 47} 48 49define <2 x i32> @vld1_2x32(ptr %ptr) { 50; CHECK-LABEL: @vld1_2x32( 51; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4 52; CHECK-NEXT: ret <2 x i32> [[TMP2]] 53; 54 %vld1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %ptr, i32 4) 55 ret <2 x i32> %vld1 56} 57 58define <1 x i64> @vld1_1x64(ptr %ptr) { 59; CHECK-LABEL: @vld1_1x64( 60; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[PTR:%.*]], align 8 61; CHECK-NEXT: ret <1 x i64> [[TMP2]] 62; 63 %vld1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %ptr, i32 8) 64 ret <1 x i64> %vld1 65} 66 67define <8 x i16> @vld1_8x16(ptr %ptr) { 68; CHECK-LABEL: @vld1_8x16( 69; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2 70; CHECK-NEXT: ret <8 x i16> [[TMP2]] 71; 72 %vld1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %ptr, i32 2) 73 ret <8 x i16> %vld1 74} 75 76define <16 x i8> @vld1_16x8(ptr %ptr) { 77; CHECK-LABEL: @vld1_16x8( 78; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1 79; CHECK-NEXT: ret <16 x i8> [[TMP2]] 80; 81 %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %ptr, i32 1) 82 ret <16 x i8> %vld1 83} 84 85define <4 x i32> @vld1_4x32(ptr %ptr) { 86; CHECK-LABEL: @vld1_4x32( 87; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 88; CHECK-NEXT: ret <4 x i32> [[TMP2]] 89; 90 %vld1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %ptr, i32 4) 91 ret <4 x i32> %vld1 92} 93 94define <2 x i64> @vld1_2x64(ptr %ptr) { 95; CHECK-LABEL: @vld1_2x64( 96; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8 97; CHECK-NEXT: ret <2 x i64> [[TMP2]] 98; 99 %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 8) 100 ret <2 x i64> %vld1 101} 102 103declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr, i32) 104declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr, i32) 105declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr, i32) 106declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr, i32) 107declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32) 108declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr, i32) 109declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr, i32) 110declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr, i32) 111