xref: /llvm-project/llvm/test/CodeGen/ARM/domain-conv-vmovs.ll (revision f799e3f9441c2a348af0357a61020cc1e397e66b)
1c8d867d4STim Northover; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
2ca9f384fSTim Northover
3ca9f384fSTim Northoverdefine <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
4*f799e3f9SStephen Lin; CHECK-LABEL: test_vmovs_via_vext_lane0to0:
5ca9f384fSTim Northover  %vec = insertelement <2 x float> %in, float %arg, i32 0
6ca9f384fSTim Northover  %res = fadd <2 x float> %vec, %vec
7ca9f384fSTim Northover
8ca9f384fSTim Northover; CHECK: vext.32 d1, d1, d0, #1
9ca9f384fSTim Northover; CHECK: vext.32 d1, d1, d1, #1
10ca9f384fSTim Northover; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
11ca9f384fSTim Northover
12ca9f384fSTim Northover  ret <2 x float> %res
13ca9f384fSTim Northover}
14ca9f384fSTim Northover
15ca9f384fSTim Northoverdefine <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
16*f799e3f9SStephen Lin; CHECK-LABEL: test_vmovs_via_vext_lane0to1:
17ca9f384fSTim Northover  %vec = insertelement <2 x float> %in, float %arg, i32 1
18ca9f384fSTim Northover  %res = fadd <2 x float> %vec, %vec
19ca9f384fSTim Northover
20ca9f384fSTim Northover; CHECK: vext.32 d1, d1, d1, #1
21ca9f384fSTim Northover; CHECK: vext.32 d1, d1, d0, #1
22ca9f384fSTim Northover; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
23ca9f384fSTim Northover
24ca9f384fSTim Northover  ret <2 x float> %res
25ca9f384fSTim Northover}
26ca9f384fSTim Northover
27ca9f384fSTim Northoverdefine <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
28*f799e3f9SStephen Lin; CHECK-LABEL: test_vmovs_via_vext_lane1to0:
29ca9f384fSTim Northover  %vec = insertelement <2 x float> %in, float %arg, i32 0
30ca9f384fSTim Northover  %res = fadd <2 x float> %vec, %vec
31ca9f384fSTim Northover
32ca9f384fSTim Northover; CHECK: vext.32 d1, d1, d1, #1
33ca9f384fSTim Northover; CHECK: vext.32 d1, d0, d1, #1
34ca9f384fSTim Northover; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
35ca9f384fSTim Northover
36ca9f384fSTim Northover  ret <2 x float> %res
37ca9f384fSTim Northover}
38ca9f384fSTim Northover
39ca9f384fSTim Northoverdefine <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
40*f799e3f9SStephen Lin; CHECK-LABEL: test_vmovs_via_vext_lane1to1:
41ca9f384fSTim Northover  %vec = insertelement <2 x float> %in, float %arg, i32 1
42ca9f384fSTim Northover  %res = fadd <2 x float> %vec, %vec
43ca9f384fSTim Northover
44ca9f384fSTim Northover; CHECK: vext.32 d1, d0, d1, #1
45ca9f384fSTim Northover; CHECK: vext.32 d1, d1, d1, #1
46ca9f384fSTim Northover; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
47ca9f384fSTim Northover
48ca9f384fSTim Northover  ret <2 x float> %res
49ca9f384fSTim Northover}
50ca9f384fSTim Northover
51ca9f384fSTim Northover
52ca9f384fSTim Northoverdefine float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
53*f799e3f9SStephen Lin; CHECK-LABEL: test_vmovs_via_vdup:
54ca9f384fSTim Northover
55ca9f384fSTim Northover  ; Do an operation (which will end up NEON because of +neonfp) to convince the
56ca9f384fSTim Northover  ; execution-domain pass that NEON is a good thing to use.
57ca9f384fSTim Northover  %res = fadd float %ret, %ret
58ca9f384fSTim Northover  ;  It makes sense for LLVM to do the addition in d0 here, because it's going
59ca9f384fSTim Northover  ;  to be returned. This means it will want a "vmov s0, s1":
60ca9f384fSTim Northover; CHECK: vdup.32 d0, d0[1]
61ca9f384fSTim Northover
62ca9f384fSTim Northover  ret float %res
63ca9f384fSTim Northover}
64ca9f384fSTim Northover
65c8d867d4STim Northoverdeclare float @llvm.sqrt.f32(float)
66c8d867d4STim Northover
67c8d867d4STim Northoverdeclare void @bar()
68c8d867d4STim Northover
69c8d867d4STim Northover; This is a comp
70c8d867d4STim Northoverdefine float @test_ineligible(float, float %in) {
71*f799e3f9SStephen Lin; CHECK-LABEL: test_ineligible:
72c8d867d4STim Northover
73c8d867d4STim Northover  %sqrt = call float @llvm.sqrt.f32(float %in)
74c8d867d4STim Northover  %val = fadd float %sqrt, %sqrt
75c8d867d4STim Northover
76c8d867d4STim Northover  ; This call forces a move from a callee-saved register to the return-reg. That
77c8d867d4STim Northover  ; move is not eligible for conversion to a d-register instructions because the
78c8d867d4STim Northover  ; use-def chains would be messed up. Primarily a compile-test (we used to
79c8d867d4STim Northover  ; internal fault).
80c8d867d4STim Northover  call void @bar()
811c704355SDmitri Gribenko; CHECK: bl bar
82ea05256bSJames Molloy; CHECK: vext.32
83ea05256bSJames Molloy; CHECK: vext.32
84c8d867d4STim Northover  ret float %val
85c8d867d4STim Northover}
86ea05256bSJames Molloy
87ea05256bSJames Molloydefine i32 @test_vmovs_no_sreg(i32 %in) {
88*f799e3f9SStephen Lin; CHECK-LABEL: test_vmovs_no_sreg:
89ea05256bSJames Molloy
90ea05256bSJames Molloy  ; Check that the movement to and from GPRs takes place in the NEON domain.
91ea05256bSJames Molloy; CHECK: vmov.32 d
92ea05256bSJames Molloy  %x = bitcast i32 %in to float
93ea05256bSJames Molloy
94ea05256bSJames Molloy  %res = fadd float %x, %x
95ea05256bSJames Molloy
96ea05256bSJames Molloy; CHECK: vmov.32 r{{[0-9]+}}, d
97ea05256bSJames Molloy  %resi = bitcast float %res to i32
98ea05256bSJames Molloy
99ea05256bSJames Molloy  ret i32 %resi
100ea05256bSJames Molloy}
101dd219d06STim Northover
102dd219d06STim Northover
103dd219d06STim Northover; The point of this test is:
104dd219d06STim Northover;   + Make sure s1 is live before the BL
105dd219d06STim Northover;   + Make sure s1 is clobbered by the BL
106dd219d06STim Northover;   + Convince LLVM to emit a VMOV to S0
107dd219d06STim Northover;   + Convince LLVM to domain-convert this.
108dd219d06STim Northover
109dd219d06STim Northover; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
110dd219d06STim Northover; because it's dead.
111dd219d06STim Northover
112dd219d06STim Northoverdeclare float @clobbers_s1(float, float)
113dd219d06STim Northover
114dd219d06STim Northoverdefine <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
115dd219d06STim Northover  %elt = call float @clobbers_s1(float %val, float %val)
116dd219d06STim Northover
117dd219d06STim Northover  %vec = insertelement <2 x float> %invec, float %elt, i32 0
118dd219d06STim Northover  %res = fadd <2 x float> %vec, %vec
119dd219d06STim Northover  ret <2 x float> %res
120dd219d06STim Northover}
121