1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s 3 4; i32 5 6define ptr @vst2_v4i32(ptr %src, ptr %dst) { 7; CHECK-LABEL: vst2_v4i32: 8; CHECK: @ %bb.0: @ %entry 9; CHECK-NEXT: vldrw.u32 q1, [r0, #16] 10; CHECK-NEXT: vldrw.u32 q0, [r0] 11; CHECK-NEXT: vst20.32 {q0, q1}, [r1] 12; CHECK-NEXT: vst21.32 {q0, q1}, [r1]! 13; CHECK-NEXT: mov r0, r1 14; CHECK-NEXT: bx lr 15entry: 16 %l1 = load <4 x i32>, ptr %src, align 4 17 %s2 = getelementptr <4 x i32>, ptr %src, i32 1 18 %l2 = load <4 x i32>, ptr %s2, align 4 19 %s = shufflevector <4 x i32> %l1, <4 x i32> %l2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 20 store <8 x i32> %s, ptr %dst 21 %ret = getelementptr inbounds <8 x i32>, ptr %dst, i32 1 22 ret ptr %ret 23} 24 25; i16 26 27define ptr @vst2_v8i16(ptr %src, ptr %dst) { 28; CHECK-LABEL: vst2_v8i16: 29; CHECK: @ %bb.0: @ %entry 30; CHECK-NEXT: vldrw.u32 q1, [r0, #16] 31; CHECK-NEXT: vldrw.u32 q0, [r0] 32; CHECK-NEXT: vst20.16 {q0, q1}, [r1] 33; CHECK-NEXT: vst21.16 {q0, q1}, [r1]! 34; CHECK-NEXT: mov r0, r1 35; CHECK-NEXT: bx lr 36entry: 37 %l1 = load <8 x i16>, ptr %src, align 4 38 %s2 = getelementptr <8 x i16>, ptr %src, i32 1 39 %l2 = load <8 x i16>, ptr %s2, align 4 40 %s = shufflevector <8 x i16> %l1, <8 x i16> %l2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 41 store <16 x i16> %s, ptr %dst 42 %ret = getelementptr inbounds <16 x i16>, ptr %dst, i32 1 43 ret ptr %ret 44} 45 46; i8 47 48define ptr @vst2_v16i8(ptr %src, ptr %dst) { 49; CHECK-LABEL: vst2_v16i8: 50; CHECK: @ %bb.0: @ %entry 51; CHECK-NEXT: vldrw.u32 q1, [r0, #16] 52; CHECK-NEXT: vldrw.u32 q0, [r0] 53; CHECK-NEXT: vst20.8 {q0, q1}, [r1] 54; CHECK-NEXT: vst21.8 {q0, q1}, [r1]! 55; CHECK-NEXT: mov r0, r1 56; CHECK-NEXT: bx lr 57entry: 58 %l1 = load <16 x i8>, ptr %src, align 4 59 %s2 = getelementptr <16 x i8>, ptr %src, i32 1 60 %l2 = load <16 x i8>, ptr %s2, align 4 61 %s = shufflevector <16 x i8> %l1, <16 x i8> %l2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 62 store <32 x i8> %s, ptr %dst 63 %ret = getelementptr inbounds <32 x i8>, ptr %dst, i32 1 64 ret ptr %ret 65} 66 67; i64 68 69define ptr @vst2_v2i64(ptr %src, ptr %dst) { 70; CHECK-LABEL: vst2_v2i64: 71; CHECK: @ %bb.0: @ %entry 72; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 73; CHECK-NEXT: vldrw.u32 q1, [r0] 74; CHECK-NEXT: vmov.f64 d5, d0 75; CHECK-NEXT: vmov.f64 d0, d3 76; CHECK-NEXT: vmov.f64 d4, d2 77; CHECK-NEXT: vstrw.32 q0, [r1, #16] 78; CHECK-NEXT: vstrw.32 q2, [r1], #32 79; CHECK-NEXT: mov r0, r1 80; CHECK-NEXT: bx lr 81entry: 82 %l1 = load <2 x i64>, ptr %src, align 4 83 %s2 = getelementptr <2 x i64>, ptr %src, i32 1 84 %l2 = load <2 x i64>, ptr %s2, align 4 85 %s = shufflevector <2 x i64> %l1, <2 x i64> %l2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 86 store <4 x i64> %s, ptr %dst 87 %ret = getelementptr inbounds <4 x i64>, ptr %dst, i32 1 88 ret ptr %ret 89} 90 91; f32 92 93define ptr @vst2_v4f32(ptr %src, ptr %dst) { 94; CHECK-LABEL: vst2_v4f32: 95; CHECK: @ %bb.0: @ %entry 96; CHECK-NEXT: vldrw.u32 q1, [r0, #16] 97; CHECK-NEXT: vldrw.u32 q0, [r0] 98; CHECK-NEXT: vst20.32 {q0, q1}, [r1] 99; CHECK-NEXT: vst21.32 {q0, q1}, [r1]! 100; CHECK-NEXT: mov r0, r1 101; CHECK-NEXT: bx lr 102entry: 103 %l1 = load <4 x float>, ptr %src, align 4 104 %s2 = getelementptr <4 x float>, ptr %src, i32 1 105 %l2 = load <4 x float>, ptr %s2, align 4 106 %s = shufflevector <4 x float> %l1, <4 x float> %l2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 107 store <8 x float> %s, ptr %dst 108 %ret = getelementptr inbounds <8 x float>, ptr %dst, i32 1 109 ret ptr %ret 110} 111 112; f16 113 114define ptr @vst2_v8f16(ptr %src, ptr %dst) { 115; CHECK-LABEL: vst2_v8f16: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vldrw.u32 q1, [r0, #16] 118; CHECK-NEXT: vldrw.u32 q0, [r0] 119; CHECK-NEXT: vst20.16 {q0, q1}, [r1] 120; CHECK-NEXT: vst21.16 {q0, q1}, [r1]! 121; CHECK-NEXT: mov r0, r1 122; CHECK-NEXT: bx lr 123entry: 124 %l1 = load <8 x half>, ptr %src, align 4 125 %s2 = getelementptr <8 x half>, ptr %src, i32 1 126 %l2 = load <8 x half>, ptr %s2, align 4 127 %s = shufflevector <8 x half> %l1, <8 x half> %l2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 128 store <16 x half> %s, ptr %dst 129 %ret = getelementptr inbounds <16 x half>, ptr %dst, i32 1 130 ret ptr %ret 131} 132 133; f64 134 135define ptr @vst2_v2f64(ptr %src, ptr %dst) { 136; CHECK-LABEL: vst2_v2f64: 137; CHECK: @ %bb.0: @ %entry 138; CHECK-NEXT: vldrw.u32 q0, [r0, #16] 139; CHECK-NEXT: vldrw.u32 q1, [r0] 140; CHECK-NEXT: vmov.f64 d5, d0 141; CHECK-NEXT: vmov.f64 d0, d3 142; CHECK-NEXT: vmov.f64 d4, d2 143; CHECK-NEXT: vstrw.32 q0, [r1, #16] 144; CHECK-NEXT: vstrw.32 q2, [r1], #32 145; CHECK-NEXT: mov r0, r1 146; CHECK-NEXT: bx lr 147entry: 148 %l1 = load <2 x double>, ptr %src, align 4 149 %s2 = getelementptr <2 x double>, ptr %src, i32 1 150 %l2 = load <2 x double>, ptr %s2, align 4 151 %s = shufflevector <2 x double> %l1, <2 x double> %l2, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 152 store <4 x double> %s, ptr %dst 153 %ret = getelementptr inbounds <4 x double>, ptr %dst, i32 1 154 ret ptr %ret 155} 156