1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -relocation-model=pic -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ 3; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ 4; RUN: -O3 < %s | FileCheck %s 5 6; RUN: llc -relocation-model=pic -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ 7; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ 8; RUN: < %s | FileCheck %s --check-prefix=CHECK-P9 \ 9; RUN: --implicit-check-not xxswapd 10 11; RUN: llc -relocation-model=pic -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ 12; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ 13; RUN: -mattr=-power9-vector < %s | FileCheck %s --check-prefix=CHECK-P9-NOVECTOR 14 15; These tests verify that VSX swap optimization works when loading a scalar 16; into a vector register. 17 18 19@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16 20@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16 21@y = global double 1.780000e+00, align 8 22 23define void @bar0() { 24; CHECK-LABEL: bar0: 25; CHECK: # %bb.0: # %entry 26; CHECK-NEXT: addis r3, r2, .LC0@toc@ha 27; CHECK-NEXT: ld r3, .LC0@toc@l(r3) 28; CHECK-NEXT: lxvd2x vs0, 0, r3 29; CHECK-NEXT: addis r3, r2, .LC1@toc@ha 30; CHECK-NEXT: ld r3, .LC1@toc@l(r3) 31; CHECK-NEXT: lfd f1, 0(r3) 32; CHECK-NEXT: addis r3, r2, .LC2@toc@ha 33; CHECK-NEXT: ld r3, .LC2@toc@l(r3) 34; CHECK-NEXT: xxswapd vs0, vs0 35; CHECK-NEXT: xxmrghd vs0, vs0, vs1 36; CHECK-NEXT: xxswapd vs0, vs0 37; CHECK-NEXT: stxvd2x vs0, 0, r3 38; CHECK-NEXT: blr 39; 40; CHECK-P9-LABEL: bar0: 41; CHECK-P9: # %bb.0: # %entry 42; CHECK-P9-NEXT: addis r3, r2, .LC0@toc@ha 43; CHECK-P9-NEXT: ld r3, .LC0@toc@l(r3) 44; CHECK-P9-NEXT: lxv vs0, 0(r3) 45; CHECK-P9-NEXT: addis r3, r2, .LC1@toc@ha 46; CHECK-P9-NEXT: ld r3, .LC1@toc@l(r3) 47; CHECK-P9-NEXT: lfd f1, 0(r3) 48; CHECK-P9-NEXT: addis r3, r2, .LC2@toc@ha 49; CHECK-P9-NEXT: ld r3, .LC2@toc@l(r3) 50; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1 51; CHECK-P9-NEXT: stxv vs0, 0(r3) 52; CHECK-P9-NEXT: blr 53; 54; CHECK-P9-NOVECTOR-LABEL: bar0: 55; CHECK-P9-NOVECTOR: # %bb.0: # %entry 56; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC0@toc@ha 57; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC0@toc@l(r3) 58; CHECK-P9-NOVECTOR-NEXT: lxvd2x vs0, 0, r3 59; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC1@toc@ha 60; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC1@toc@l(r3) 61; CHECK-P9-NOVECTOR-NEXT: xxswapd vs0, vs0 62; CHECK-P9-NOVECTOR-NEXT: lfd f1, 0(r3) 63; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC2@toc@ha 64; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC2@toc@l(r3) 65; CHECK-P9-NOVECTOR-NEXT: xxmrghd vs0, vs0, vs1 66; CHECK-P9-NOVECTOR-NEXT: xxswapd vs0, vs0 67; CHECK-P9-NOVECTOR-NEXT: stxvd2x vs0, 0, r3 68; CHECK-P9-NOVECTOR-NEXT: blr 69entry: 70 %0 = load <2 x double>, ptr @x, align 16 71 %1 = load double, ptr @y, align 8 72 %vecins = insertelement <2 x double> %0, double %1, i32 0 73 store <2 x double> %vecins, ptr @z, align 16 74 ret void 75} 76 77define void @bar1() { 78; CHECK-LABEL: bar1: 79; CHECK: # %bb.0: # %entry 80; CHECK-NEXT: addis r3, r2, .LC0@toc@ha 81; CHECK-NEXT: ld r3, .LC0@toc@l(r3) 82; CHECK-NEXT: lxvd2x vs0, 0, r3 83; CHECK-NEXT: addis r3, r2, .LC1@toc@ha 84; CHECK-NEXT: ld r3, .LC1@toc@l(r3) 85; CHECK-NEXT: lfd f1, 0(r3) 86; CHECK-NEXT: addis r3, r2, .LC2@toc@ha 87; CHECK-NEXT: ld r3, .LC2@toc@l(r3) 88; CHECK-NEXT: xxswapd vs0, vs0 89; CHECK-NEXT: xxpermdi vs0, vs1, vs0, 1 90; CHECK-NEXT: xxswapd vs0, vs0 91; CHECK-NEXT: stxvd2x vs0, 0, r3 92; CHECK-NEXT: blr 93; 94; CHECK-P9-LABEL: bar1: 95; CHECK-P9: # %bb.0: # %entry 96; CHECK-P9-NEXT: addis r3, r2, .LC0@toc@ha 97; CHECK-P9-NEXT: ld r3, .LC0@toc@l(r3) 98; CHECK-P9-NEXT: lxv vs0, 0(r3) 99; CHECK-P9-NEXT: addis r3, r2, .LC1@toc@ha 100; CHECK-P9-NEXT: ld r3, .LC1@toc@l(r3) 101; CHECK-P9-NEXT: lfd f1, 0(r3) 102; CHECK-P9-NEXT: addis r3, r2, .LC2@toc@ha 103; CHECK-P9-NEXT: ld r3, .LC2@toc@l(r3) 104; CHECK-P9-NEXT: xxpermdi vs0, vs1, vs0, 1 105; CHECK-P9-NEXT: stxv vs0, 0(r3) 106; CHECK-P9-NEXT: blr 107; 108; CHECK-P9-NOVECTOR-LABEL: bar1: 109; CHECK-P9-NOVECTOR: # %bb.0: # %entry 110; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC0@toc@ha 111; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC0@toc@l(r3) 112; CHECK-P9-NOVECTOR-NEXT: lxvd2x vs0, 0, r3 113; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC1@toc@ha 114; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC1@toc@l(r3) 115; CHECK-P9-NOVECTOR-NEXT: xxswapd vs0, vs0 116; CHECK-P9-NOVECTOR-NEXT: lfd f1, 0(r3) 117; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC2@toc@ha 118; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC2@toc@l(r3) 119; CHECK-P9-NOVECTOR-NEXT: xxpermdi vs0, vs1, vs0, 1 120; CHECK-P9-NOVECTOR-NEXT: xxswapd vs0, vs0 121; CHECK-P9-NOVECTOR-NEXT: stxvd2x vs0, 0, r3 122; CHECK-P9-NOVECTOR-NEXT: blr 123entry: 124 %0 = load <2 x double>, ptr @x, align 16 125 %1 = load double, ptr @y, align 8 126 %vecins = insertelement <2 x double> %0, double %1, i32 1 127 store <2 x double> %vecins, ptr @z, align 16 128 ret void 129} 130 131