1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -mattr=+aes -o - %s| FileCheck %s --check-prefixes=CHECK 3 4; Two operands are in scalar form. 5; Tests that both operands are loaded into SIMD registers directly as opposed to being loaded into GPR followed by a fmov. 6define void @test1(ptr %0, i64 %1, i64 %2) { 7; CHECK-LABEL: test1: 8; CHECK: // %bb.0: 9; CHECK-NEXT: add x8, x0, x1, lsl #4 10; CHECK-NEXT: add x9, x0, x2, lsl #4 11; CHECK-NEXT: ldr d0, [x9, #8] 12; CHECK-NEXT: ldr d1, [x8, #8] 13; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d 14; CHECK-NEXT: str q0, [x0] 15; CHECK-NEXT: ret 16 %4 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1 17 %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1 18 %6 = load i64, ptr %5, align 8 19 %7 = getelementptr inbounds <2 x i64>, ptr %0, i64 %2, i64 1 20 %8 = load i64, ptr %7, align 8 21 %9 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %6, i64 %8) 22 store <16 x i8> %9, ptr %0, align 16 23 ret void 24} 25 26; Operand %8 is higher-half of v2i64, and operand %7 is a scalar load. 27; Tests that operand is loaded into SIMD registers directly as opposed to being loaded into GPR followed by a fmov. 28define void @test2(ptr %0, i64 %1, i64 %2, <2 x i64> %3) { 29; CHECK-LABEL: test2: 30; CHECK: // %bb.0: 31; CHECK-NEXT: add x8, x0, x1, lsl #4 32; CHECK-NEXT: add x9, x8, #8 33; CHECK-NEXT: ld1r { v1.2d }, [x9] 34; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d 35; CHECK-NEXT: str q0, [x8] 36; CHECK-NEXT: ret 37 %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1 38 %6 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1 39 %7 = load i64, ptr %6, align 8 40 %8 = extractelement <2 x i64> %3, i64 1 41 %9 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %8, i64 %7) 42 store <16 x i8> %9, ptr %5, align 16 43 ret void 44} 45 46; Operand %7 is a scalar load, and operand %3 is an input parameter of function `test4`. 47; Test that %7 is loaded into SIMD registers. 48define void @test3(ptr %0, i64 %1, i64 %2, i64 %3) { 49; CHECK-LABEL: test3: 50; CHECK: // %bb.0: 51; CHECK-NEXT: add x8, x0, x1, lsl #4 52; CHECK-NEXT: fmov d1, x3 53; CHECK-NEXT: ldr d0, [x8, #8] 54; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d 55; CHECK-NEXT: str q0, [x8] 56; CHECK-NEXT: ret 57 %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1 58 %6 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1, i64 1 59 %7 = load i64, ptr %6, align 8 60 %8 = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %7, i64 %3) 61 store <16 x i8> %8, ptr %5, align 16 62 ret void 63} 64 65declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) 66