1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE 4 5define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4(ptr %vp) { 6; CHECK-LE-LABEL: load_4xi32_a4: 7; CHECK-LE: @ %bb.0: @ %entry 8; CHECK-LE-NEXT: vldrw.u32 q0, [r0] 9; CHECK-LE-NEXT: vshr.u32 q0, q0, #1 10; CHECK-LE-NEXT: bx lr 11; 12; CHECK-BE-LABEL: load_4xi32_a4: 13; CHECK-BE: @ %bb.0: @ %entry 14; CHECK-BE-NEXT: vldrw.u32 q0, [r0] 15; CHECK-BE-NEXT: vshr.u32 q1, q0, #1 16; CHECK-BE-NEXT: vrev64.32 q0, q1 17; CHECK-BE-NEXT: bx lr 18entry: 19 %0 = load <4 x i32>, ptr %vp, align 4 20 %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 21 ret <4 x i32> %1 22} 23 24define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a2(ptr %vp) { 25; CHECK-LE-LABEL: load_4xi32_a2: 26; CHECK-LE: @ %bb.0: @ %entry 27; CHECK-LE-NEXT: vldrh.u16 q0, [r0] 28; CHECK-LE-NEXT: vshr.u32 q0, q0, #1 29; CHECK-LE-NEXT: bx lr 30; 31; CHECK-BE-LABEL: load_4xi32_a2: 32; CHECK-BE: @ %bb.0: @ %entry 33; CHECK-BE-NEXT: vldrb.u8 q0, [r0] 34; CHECK-BE-NEXT: vrev32.8 q0, q0 35; CHECK-BE-NEXT: vshr.u32 q1, q0, #1 36; CHECK-BE-NEXT: vrev64.32 q0, q1 37; CHECK-BE-NEXT: bx lr 38entry: 39 %0 = load <4 x i32>, ptr %vp, align 2 40 %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 41 ret <4 x i32> %1 42} 43 44define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a1(ptr %vp) { 45; CHECK-LE-LABEL: load_4xi32_a1: 46; CHECK-LE: @ %bb.0: @ %entry 47; CHECK-LE-NEXT: vldrb.u8 q0, [r0] 48; CHECK-LE-NEXT: vshr.u32 q0, q0, #1 49; CHECK-LE-NEXT: bx lr 50; 51; CHECK-BE-LABEL: load_4xi32_a1: 52; CHECK-BE: @ %bb.0: @ %entry 53; CHECK-BE-NEXT: vldrb.u8 q0, [r0] 54; CHECK-BE-NEXT: vrev32.8 q0, q0 55; CHECK-BE-NEXT: vshr.u32 q1, q0, #1 56; CHECK-BE-NEXT: vrev64.32 q0, q1 57; CHECK-BE-NEXT: bx lr 58entry: 59 %0 = load <4 x i32>, ptr %vp, align 1 60 %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 61 ret <4 x i32> %1 62} 63 64define arm_aapcs_vfpcc void @store_4xi32_a4(ptr %vp, <4 x i32> %val) { 65; CHECK-LE-LABEL: store_4xi32_a4: 66; CHECK-LE: @ %bb.0: @ %entry 67; CHECK-LE-NEXT: vshr.u32 q0, q0, #1 68; CHECK-LE-NEXT: vstrw.32 q0, [r0] 69; CHECK-LE-NEXT: bx lr 70; 71; CHECK-BE-LABEL: store_4xi32_a4: 72; CHECK-BE: @ %bb.0: @ %entry 73; CHECK-BE-NEXT: vrev64.32 q1, q0 74; CHECK-BE-NEXT: vshr.u32 q0, q1, #1 75; CHECK-BE-NEXT: vstrw.32 q0, [r0] 76; CHECK-BE-NEXT: bx lr 77entry: 78 %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1> 79 store <4 x i32> %0, ptr %vp, align 4 80 ret void 81} 82 83define arm_aapcs_vfpcc void @store_4xi32_a2(ptr %vp, <4 x i32> %val) { 84; CHECK-LE-LABEL: store_4xi32_a2: 85; CHECK-LE: @ %bb.0: @ %entry 86; CHECK-LE-NEXT: vshr.u32 q0, q0, #1 87; CHECK-LE-NEXT: vstrh.16 q0, [r0] 88; CHECK-LE-NEXT: bx lr 89; 90; CHECK-BE-LABEL: store_4xi32_a2: 91; CHECK-BE: @ %bb.0: @ %entry 92; CHECK-BE-NEXT: vrev64.32 q1, q0 93; CHECK-BE-NEXT: vshr.u32 q0, q1, #1 94; CHECK-BE-NEXT: vrev32.8 q0, q0 95; CHECK-BE-NEXT: vstrb.8 q0, [r0] 96; CHECK-BE-NEXT: bx lr 97entry: 98 %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1> 99 store <4 x i32> %0, ptr %vp, align 2 100 ret void 101} 102 103define arm_aapcs_vfpcc void @store_4xi32_a1(ptr %vp, <4 x i32> %val) { 104; CHECK-LE-LABEL: store_4xi32_a1: 105; CHECK-LE: @ %bb.0: @ %entry 106; CHECK-LE-NEXT: vshr.u32 q0, q0, #1 107; CHECK-LE-NEXT: vstrb.8 q0, [r0] 108; CHECK-LE-NEXT: bx lr 109; 110; CHECK-BE-LABEL: store_4xi32_a1: 111; CHECK-BE: @ %bb.0: @ %entry 112; CHECK-BE-NEXT: vrev64.32 q1, q0 113; CHECK-BE-NEXT: vshr.u32 q0, q1, #1 114; CHECK-BE-NEXT: vrev32.8 q0, q0 115; CHECK-BE-NEXT: vstrb.8 q0, [r0] 116; CHECK-BE-NEXT: bx lr 117entry: 118 %0 = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1> 119 store <4 x i32> %0, ptr %vp, align 1 120 ret void 121} 122 123define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_pos(ptr %ip) { 124; CHECK-LE-LABEL: load_4xi32_a4_offset_pos: 125; CHECK-LE: @ %bb.0: @ %entry 126; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #508] 127; CHECK-LE-NEXT: bx lr 128; 129; CHECK-BE-LABEL: load_4xi32_a4_offset_pos: 130; CHECK-BE: @ %bb.0: @ %entry 131; CHECK-BE-NEXT: add.w r0, r0, #508 132; CHECK-BE-NEXT: vldrb.u8 q1, [r0] 133; CHECK-BE-NEXT: vrev64.8 q0, q1 134; CHECK-BE-NEXT: bx lr 135entry: 136 %ipoffset = getelementptr inbounds i32, ptr %ip, i32 127 137 %0 = load <4 x i32>, ptr %ipoffset, align 4 138 ret <4 x i32> %0 139} 140 141define arm_aapcs_vfpcc <4 x i32> @load_4xi32_a4_offset_neg(ptr %ip) { 142; CHECK-LE-LABEL: load_4xi32_a4_offset_neg: 143; CHECK-LE: @ %bb.0: @ %entry 144; CHECK-LE-NEXT: vldrw.u32 q0, [r0, #-508] 145; CHECK-LE-NEXT: bx lr 146; 147; CHECK-BE-LABEL: load_4xi32_a4_offset_neg: 148; CHECK-BE: @ %bb.0: @ %entry 149; CHECK-BE-NEXT: sub.w r0, r0, #508 150; CHECK-BE-NEXT: vldrb.u8 q1, [r0] 151; CHECK-BE-NEXT: vrev64.8 q0, q1 152; CHECK-BE-NEXT: bx lr 153entry: 154 %ipoffset = getelementptr inbounds i32, ptr %ip, i32 -127 155 %0 = load <4 x i32>, ptr %ipoffset, align 4 156 ret <4 x i32> %0 157} 158 159define arm_aapcs_vfpcc <4 x i32> @loadstore_4xi32_stack_off16() { 160; CHECK-LE-LABEL: loadstore_4xi32_stack_off16: 161; CHECK-LE: @ %bb.0: @ %entry 162; CHECK-LE-NEXT: .pad #40 163; CHECK-LE-NEXT: sub sp, #40 164; CHECK-LE-NEXT: vmov.i32 q0, #0x1 165; CHECK-LE-NEXT: mov r0, sp 166; CHECK-LE-NEXT: vstrw.32 q0, [r0] 167; CHECK-LE-NEXT: movs r0, #3 168; CHECK-LE-NEXT: vstrw.32 q0, [sp, #16] 169; CHECK-LE-NEXT: str r0, [sp, #16] 170; CHECK-LE-NEXT: vldrw.u32 q0, [sp, #16] 171; CHECK-LE-NEXT: add sp, #40 172; CHECK-LE-NEXT: bx lr 173; 174; CHECK-BE-LABEL: loadstore_4xi32_stack_off16: 175; CHECK-BE: @ %bb.0: @ %entry 176; CHECK-BE-NEXT: .pad #40 177; CHECK-BE-NEXT: sub sp, #40 178; CHECK-BE-NEXT: vmov.i32 q0, #0x1 179; CHECK-BE-NEXT: mov r0, sp 180; CHECK-BE-NEXT: vstrw.32 q0, [r0] 181; CHECK-BE-NEXT: movs r0, #3 182; CHECK-BE-NEXT: vstrw.32 q0, [sp, #16] 183; CHECK-BE-NEXT: str r0, [sp, #16] 184; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16] 185; CHECK-BE-NEXT: vrev64.8 q0, q1 186; CHECK-BE-NEXT: add sp, #40 187; CHECK-BE-NEXT: bx lr 188entry: 189 %c = alloca [1 x [5 x [2 x i32]]], align 4 190 store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %c, align 4 191 %arrayidx5.2 = getelementptr inbounds [1 x [5 x [2 x i32]]], ptr %c, i32 0, i32 0, i32 2, i32 0 192 store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %arrayidx5.2, align 4 193 store i32 3, ptr %arrayidx5.2, align 4 194 %0 = load <4 x i32>, ptr %arrayidx5.2, align 4 195 ret <4 x i32> %0 196} 197 198define arm_aapcs_vfpcc <8 x i16> @loadstore_8xi16_stack_off16() { 199; CHECK-LE-LABEL: loadstore_8xi16_stack_off16: 200; CHECK-LE: @ %bb.0: @ %entry 201; CHECK-LE-NEXT: .pad #40 202; CHECK-LE-NEXT: sub sp, #40 203; CHECK-LE-NEXT: vmov.i16 q0, #0x1 204; CHECK-LE-NEXT: mov r0, sp 205; CHECK-LE-NEXT: vstrh.16 q0, [r0] 206; CHECK-LE-NEXT: movs r0, #3 207; CHECK-LE-NEXT: vstrh.16 q0, [sp, #16] 208; CHECK-LE-NEXT: strh.w r0, [sp, #16] 209; CHECK-LE-NEXT: vldrh.u16 q0, [sp, #16] 210; CHECK-LE-NEXT: add sp, #40 211; CHECK-LE-NEXT: bx lr 212; 213; CHECK-BE-LABEL: loadstore_8xi16_stack_off16: 214; CHECK-BE: @ %bb.0: @ %entry 215; CHECK-BE-NEXT: .pad #40 216; CHECK-BE-NEXT: sub sp, #40 217; CHECK-BE-NEXT: vmov.i16 q0, #0x1 218; CHECK-BE-NEXT: mov r0, sp 219; CHECK-BE-NEXT: vstrh.16 q0, [r0] 220; CHECK-BE-NEXT: movs r0, #3 221; CHECK-BE-NEXT: vstrh.16 q0, [sp, #16] 222; CHECK-BE-NEXT: strh.w r0, [sp, #16] 223; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16] 224; CHECK-BE-NEXT: vrev64.8 q0, q1 225; CHECK-BE-NEXT: add sp, #40 226; CHECK-BE-NEXT: bx lr 227entry: 228 %c = alloca [1 x [10 x [2 x i16]]], align 2 229 store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %c, align 2 230 %arrayidx5.2 = getelementptr inbounds [1 x [10 x [2 x i16]]], ptr %c, i32 0, i32 0, i32 4, i32 0 231 store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %arrayidx5.2, align 2 232 store i16 3, ptr %arrayidx5.2, align 2 233 %0 = load <8 x i16>, ptr %arrayidx5.2, align 2 234 ret <8 x i16> %0 235} 236 237define arm_aapcs_vfpcc <16 x i8> @loadstore_16xi8_stack_off16() { 238; CHECK-LE-LABEL: loadstore_16xi8_stack_off16: 239; CHECK-LE: @ %bb.0: @ %entry 240; CHECK-LE-NEXT: .pad #40 241; CHECK-LE-NEXT: sub sp, #40 242; CHECK-LE-NEXT: vmov.i8 q0, #0x1 243; CHECK-LE-NEXT: mov r0, sp 244; CHECK-LE-NEXT: vstrb.8 q0, [r0] 245; CHECK-LE-NEXT: movs r0, #3 246; CHECK-LE-NEXT: vstrb.8 q0, [sp, #16] 247; CHECK-LE-NEXT: strb.w r0, [sp, #16] 248; CHECK-LE-NEXT: vldrb.u8 q0, [sp, #16] 249; CHECK-LE-NEXT: add sp, #40 250; CHECK-LE-NEXT: bx lr 251; 252; CHECK-BE-LABEL: loadstore_16xi8_stack_off16: 253; CHECK-BE: @ %bb.0: @ %entry 254; CHECK-BE-NEXT: .pad #40 255; CHECK-BE-NEXT: sub sp, #40 256; CHECK-BE-NEXT: vmov.i8 q0, #0x1 257; CHECK-BE-NEXT: mov r0, sp 258; CHECK-BE-NEXT: vstrb.8 q0, [r0] 259; CHECK-BE-NEXT: movs r0, #3 260; CHECK-BE-NEXT: vstrb.8 q0, [sp, #16] 261; CHECK-BE-NEXT: strb.w r0, [sp, #16] 262; CHECK-BE-NEXT: vldrb.u8 q1, [sp, #16] 263; CHECK-BE-NEXT: vrev64.8 q0, q1 264; CHECK-BE-NEXT: add sp, #40 265; CHECK-BE-NEXT: bx lr 266entry: 267 %c = alloca [1 x [20 x [2 x i8]]], align 1 268 store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %c, align 1 269 %arrayidx5.2 = getelementptr inbounds [1 x [20 x [2 x i8]]], ptr %c, i32 0, i32 0, i32 8, i32 0 270 store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %arrayidx5.2, align 1 271 store i8 3, ptr %arrayidx5.2, align 1 272 %0 = load <16 x i8>, ptr %arrayidx5.2, align 1 273 ret <16 x i8> %0 274} 275