1; RUN: llc < %s -mcpu=cortex-a53 -enable-post-misched=false -enable-aa-sched-mi | FileCheck %s 2 3; Check that the vector store intrinsic does not prevent fmla instructions from 4; being scheduled together. Since the vector loads and stores generated from 5; the intrinsics do not alias each other, the store can be pushed past the load. 6; This allows fmla instructions to be scheduled together. 7 8 9; CHECK: fmla 10; CHECK-NEXT: mov 11; CHECK-NEXT: mov 12; CHECK-NEXT: fmla 13; CHECK-NEXT: fmla 14; CHECK-NEXT: fmla 15target datalayout = "e-m:e-i64:64-i128:128-n8:16:32:64-S128" 16target triple = "aarch64--linux-gnu" 17 18%Struct = type { ptr, [9 x double], [16 x {float, float}], [16 x {float, float}], i32, i32 } 19 20; Function Attrs: nounwind 21define linkonce_odr void @func(ptr nocapture %this, <4 x float> %f) unnamed_addr #0 align 2 { 22entry: 23 %scevgep = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 8, i32 0 24 %vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep) 25 %ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1 26 %fm1 = fmul <4 x float> %f, %ev1 27 %av1 = fadd <4 x float> %f, %fm1 28 %ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0 29 %fm2 = fmul <4 x float> %f, %ev2 30 %av2 = fadd <4 x float> %f, %fm2 31 %scevgep2 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 8, i32 0 32 tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av2, <4 x float> %av1, ptr %scevgep2) 33 %scevgep3 = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 12, i32 0 34 %vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep3) 35 %ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1 36 %fm3 = fmul <4 x float> %f, %ev3 37 %av3 = fadd <4 x float> %f, %fm3 38 %ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0 39 %fm4 = fmul <4 x float> %f, %ev4 40 %av4 = fadd <4 x float> %f, %fm4 41 %scevgep4 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 12, i32 0 42 tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av4, <4 x float> %av3, ptr %scevgep4) 43 ret void 44} 45 46; Function Attrs: nounwind readonly 47declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr) #2 48 49; Function Attrs: nounwind 50declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr nocapture) #1 51 52attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 53attributes #1 = { nounwind } 54attributes #2 = { nounwind readonly } 55