xref: /llvm-project/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs  -mtriple=aarch64-linux-gnu -mattr=+aes -o - %s| FileCheck %s --check-prefixes=CHECK
3
4; Two operands are in scalar form.
5; Tests that both operands are loaded into SIMD registers directly as opposed to being loaded into GPR followed by a fmov.
6define void @test1(ptr %0, i64 %1, i64 %2) {
7; CHECK-LABEL: test1:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    add x8, x0, x1, lsl #4
10; CHECK-NEXT:    add x9, x0, x2, lsl #4
11; CHECK-NEXT:    ldr d0, [x9, #8]
12; CHECK-NEXT:    ldr d1, [x8, #8]
13; CHECK-NEXT:    pmull v0.1q, v1.1d, v0.1d
14; CHECK-NEXT:    str q0, [x0]
15; CHECK-NEXT:    ret
16  %4 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1
17  %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1
18  %6 = load i64, ptr %5, align 8
19  %7 = getelementptr inbounds <2 x i64>, ptr %0, i64 %2, i64 1
20  %8 = load i64, ptr %7, align 8
21  %9 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %6, i64 %8)
22  store <16 x i8> %9, ptr %0, align 16
23  ret void
24}
25
26; Operand %8 is higher-half of v2i64, and operand %7 is a scalar load.
27; Tests that operand is loaded into SIMD registers directly as opposed to being loaded into GPR followed by a fmov.
28define void @test2(ptr %0, i64 %1, i64 %2, <2 x i64> %3) {
29; CHECK-LABEL: test2:
30; CHECK:       // %bb.0:
31; CHECK-NEXT:    add x8, x0, x1, lsl #4
32; CHECK-NEXT:    add x9, x8, #8
33; CHECK-NEXT:    ld1r { v1.2d }, [x9]
34; CHECK-NEXT:    pmull2 v0.1q, v0.2d, v1.2d
35; CHECK-NEXT:    str q0, [x8]
36; CHECK-NEXT:    ret
37  %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1
38  %6 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1
39  %7 = load i64, ptr %6, align 8
40  %8 = extractelement <2 x i64> %3, i64 1
41  %9 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %8, i64 %7)
42  store <16 x i8> %9, ptr %5, align 16
43  ret void
44}
45
46; Operand %7 is a scalar load, and operand %3 is an input parameter of function `test4`.
47; Test that %7 is loaded into SIMD registers.
48define void @test3(ptr %0, i64 %1, i64 %2, i64 %3) {
49; CHECK-LABEL: test3:
50; CHECK:       // %bb.0:
51; CHECK-NEXT:    add x8, x0, x1, lsl #4
52; CHECK-NEXT:    fmov d1, x3
53; CHECK-NEXT:    ldr d0, [x8, #8]
54; CHECK-NEXT:    pmull v0.1q, v0.1d, v1.1d
55; CHECK-NEXT:    str q0, [x8]
56; CHECK-NEXT:    ret
57  %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1
58  %6 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1
59  %7 = load i64, ptr %6, align 8
60  %8 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %7, i64 %3)
61  store <16 x i8> %8, ptr %5, align 16
62  ret void
63}
64
65declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
66