xref: /llvm-project/llvm/test/Transforms/PhaseOrdering/ARM/mve-floatreduce.ll (revision d1e5e6735a845f1281f11389da1e5a55a0d2e87a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -O3 -S                   | FileCheck %s
3; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s
4
5target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
6target triple = "thumbv8.1m.main-none-none-eabi"
7
8define arm_aapcs_vfpcc half @vecAddAcrossF16Mve(<8 x half> %in) #0 {
9; CHECK-LABEL: @vecAddAcrossF16Mve(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x half> [[IN:%.*]], <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
12; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <8 x half> [[TMP0]], [[IN]]
13; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32>
14; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 3, i32 poison>
15; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half>
16; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast <8 x half> [[TMP1]], [[TMP4]]
17; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x half> [[TMP5]], i64 0
18; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x half> [[TMP5]], i64 4
19; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP6]], [[TMP7]]
20; CHECK-NEXT:    ret half [[ADD]]
21;
22entry:
23  %0 = shufflevector <8 x half> %in, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
24  %1 = fadd fast <8 x half> %0, %in
25  %2 = bitcast <8 x half> %1 to <4 x i32>
26  %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
27  %4 = bitcast <4 x i32> %3 to <8 x half>
28  %5 = fadd fast <8 x half> %1, %4
29  %6 = extractelement <8 x half> %5, i32 0
30  %7 = extractelement <8 x half> %5, i32 4
31  %add = fadd fast half %6, %7
32  ret half %add
33}
34
35define arm_aapcs_vfpcc float @vecAddAcrossF32Mve(<4 x float> %in) {
36; CHECK-LABEL: @vecAddAcrossF32Mve(
37; CHECK-NEXT:  entry:
38; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[IN:%.*]], i64 0
39; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[IN]], i64 1
40; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP0]], [[TMP1]]
41; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[IN]], i64 2
42; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[ADD]], [[TMP2]]
43; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[IN]], i64 3
44; CHECK-NEXT:    [[ADD2:%.*]] = fadd fast float [[ADD1]], [[TMP3]]
45; CHECK-NEXT:    ret float [[ADD2]]
46;
47entry:
48  %0 = extractelement <4 x float> %in, i32 0
49  %1 = extractelement <4 x float> %in, i32 1
50  %add = fadd fast float %0, %1
51  %2 = extractelement <4 x float> %in, i32 2
52  %add1 = fadd fast float %add, %2
53  %3 = extractelement <4 x float> %in, i32 3
54  %add2 = fadd fast float %add1, %3
55  ret float %add2
56}
57
58attributes #0 = { "target-features"="+mve.fp" }
59