1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4; VLDRH.u32 Qd, [base, offs, #uxtw #1] 5define arm_aapcs_vfpcc void @ext_scaled_i16_i32(ptr %base, ptr %offptr, <4 x i32> %input) { 6; CHECK-LABEL: ext_scaled_i16_i32: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vldrw.u32 q1, [r1] 9; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1] 10; CHECK-NEXT: bx lr 11entry: 12 %offs = load <4 x i32>, ptr %offptr, align 4 13 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs 14 %t = trunc <4 x i32> %input to <4 x i16> 15 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 16 ret void 17} 18 19; VSTRW.32 Qd, [base, offs, uxtw #2] 20define arm_aapcs_vfpcc void @scaled_i32_i32(ptr %base, ptr %offptr, <4 x i32> %input) { 21; CHECK-LABEL: scaled_i32_i32: 22; CHECK: @ %bb.0: @ %entry 23; CHECK-NEXT: vldrw.u32 q1, [r1] 24; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 25; CHECK-NEXT: bx lr 26entry: 27 %offs = load <4 x i32>, ptr %offptr, align 4 28 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs 29 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 30 ret void 31} 32 33; VSTRW.32 Qd, [base, offs, uxtw #2] 34define arm_aapcs_vfpcc void @scaled_f32_i32(ptr %base, ptr %offptr, <4 x float> %input) { 35; CHECK-LABEL: scaled_f32_i32: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vldrw.u32 q1, [r1] 38; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 39; CHECK-NEXT: bx lr 40entry: 41 %offs = load <4 x i32>, ptr %offptr, align 4 42 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs 43 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr> 44 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 45 ret void 46} 47 48; VSTRW.32 Qd, [base, offs.zext, uxtw #2] 49define arm_aapcs_vfpcc void @unsigned_scaled_b_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) { 50; CHECK-LABEL: unsigned_scaled_b_i32_i16: 51; CHECK: @ %bb.0: @ %entry 52; CHECK-NEXT: vldrh.u32 q1, [r1] 53; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 54; CHECK-NEXT: bx lr 55entry: 56 %offs = load <4 x i16>, ptr %offptr, align 2 57 %offs.zext = zext <4 x i16> %offs to <4 x i32> 58 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext 59 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 60 ret void 61} 62 63; VSTRW.32 Qd, [base, offs.sext, uxtw #2] 64define arm_aapcs_vfpcc void @signed_scaled_i32_i16(ptr %base, ptr %offptr, <4 x i32> %input) { 65; CHECK-LABEL: signed_scaled_i32_i16: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vldrh.s32 q1, [r1] 68; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 69; CHECK-NEXT: bx lr 70entry: 71 %offs = load <4 x i16>, ptr %offptr, align 2 72 %offs.sext = sext <4 x i16> %offs to <4 x i32> 73 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext 74 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 75 ret void 76} 77 78; VSTRW.32 Qd, [base, offs.zext, uxtw #2] 79define arm_aapcs_vfpcc void @a_unsigned_scaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) { 80; CHECK-LABEL: a_unsigned_scaled_f32_i16: 81; CHECK: @ %bb.0: @ %entry 82; CHECK-NEXT: vldrh.u32 q1, [r1] 83; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 84; CHECK-NEXT: bx lr 85entry: 86 %offs = load <4 x i16>, ptr %offptr, align 2 87 %offs.zext = zext <4 x i16> %offs to <4 x i32> 88 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext 89 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr> 90 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 91 ret void 92} 93 94; VSTRW.32 Qd, [base, offs.sext, uxtw #2] 95define arm_aapcs_vfpcc void @b_signed_scaled_f32_i16(ptr %base, ptr %offptr, <4 x float> %input) { 96; CHECK-LABEL: b_signed_scaled_f32_i16: 97; CHECK: @ %bb.0: @ %entry 98; CHECK-NEXT: vldrh.s32 q1, [r1] 99; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 100; CHECK-NEXT: bx lr 101entry: 102 %offs = load <4 x i16>, ptr %offptr, align 2 103 %offs.sext = sext <4 x i16> %offs to <4 x i32> 104 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext 105 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr> 106 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 107 ret void 108} 109 110; VLDRH.u32 Qd, [base, offs.sext, uxtw #1] 111define arm_aapcs_vfpcc void @ext_signed_scaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) { 112; CHECK-LABEL: ext_signed_scaled_i16_i16: 113; CHECK: @ %bb.0: @ %entry 114; CHECK-NEXT: vldrh.s32 q1, [r1] 115; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1] 116; CHECK-NEXT: bx lr 117entry: 118 %offs = load <4 x i16>, ptr %offptr, align 2 119 %offs.sext = sext <4 x i16> %offs to <4 x i32> 120 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.sext 121 %t = trunc <4 x i32> %input to <4 x i16> 122 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 123 ret void 124} 125 126; VSTRH.32 Qd, [base, offs.sext, uxtw #1] 127define arm_aapcs_vfpcc void @ext_unsigned_scaled_i16_i16(ptr %base, ptr %offptr, <4 x i32> %input) { 128; CHECK-LABEL: ext_unsigned_scaled_i16_i16: 129; CHECK: @ %bb.0: @ %entry 130; CHECK-NEXT: vldrh.u32 q1, [r1] 131; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1] 132; CHECK-NEXT: bx lr 133entry: 134 %offs = load <4 x i16>, ptr %offptr, align 2 135 %offs.zext = zext <4 x i16> %offs to <4 x i32> 136 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.zext 137 %t = trunc <4 x i32> %input to <4 x i16> 138 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 139 ret void 140} 141 142; VSTRW.32 Qd, [base, offs.zext, uxtw #2] 143define arm_aapcs_vfpcc void @unsigned_scaled_b_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) { 144; CHECK-LABEL: unsigned_scaled_b_i32_i8: 145; CHECK: @ %bb.0: @ %entry 146; CHECK-NEXT: vldrb.u32 q1, [r1] 147; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 148; CHECK-NEXT: bx lr 149entry: 150 %offs = load <4 x i8>, ptr %offptr, align 1 151 %offs.zext = zext <4 x i8> %offs to <4 x i32> 152 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext 153 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 154 ret void 155} 156 157; VSTRW.32 Qd, [base, offs.sext, uxtw #2] 158define arm_aapcs_vfpcc void @signed_scaled_i32_i8(ptr %base, ptr %offptr, <4 x i32> %input) { 159; CHECK-LABEL: signed_scaled_i32_i8: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vldrb.s32 q1, [r1] 162; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 163; CHECK-NEXT: bx lr 164entry: 165 %offs = load <4 x i8>, ptr %offptr, align 1 166 %offs.sext = sext <4 x i8> %offs to <4 x i32> 167 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext 168 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 169 ret void 170} 171 172; VSTRW.32 Qd, [base, offs.zext, uxtw #2] 173define arm_aapcs_vfpcc void @a_unsigned_scaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) { 174; CHECK-LABEL: a_unsigned_scaled_f32_i8: 175; CHECK: @ %bb.0: @ %entry 176; CHECK-NEXT: vldrb.u32 q1, [r1] 177; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 178; CHECK-NEXT: bx lr 179entry: 180 %offs = load <4 x i8>, ptr %offptr, align 1 181 %offs.zext = zext <4 x i8> %offs to <4 x i32> 182 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.zext 183 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr> 184 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 185 ret void 186} 187 188; VSTRW.32 Qd, [base, offs.sext, uxtw #2] 189define arm_aapcs_vfpcc void @b_signed_scaled_f32_i8(ptr %base, ptr %offptr, <4 x float> %input) { 190; CHECK-LABEL: b_signed_scaled_f32_i8: 191; CHECK: @ %bb.0: @ %entry 192; CHECK-NEXT: vldrb.s32 q1, [r1] 193; CHECK-NEXT: vstrw.32 q0, [r0, q1, uxtw #2] 194; CHECK-NEXT: bx lr 195entry: 196 %offs = load <4 x i8>, ptr %offptr, align 1 197 %offs.sext = sext <4 x i8> %offs to <4 x i32> 198 %i32_ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> %offs.sext 199 %ptrs = bitcast <4 x ptr> %i32_ptrs to <4 x ptr> 200 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 201 ret void 202} 203 204; VLDRH.z32 Qd, [base, offs.sext, uxtw #1] 205define arm_aapcs_vfpcc void @ext_signed_scaled_i16_i8(ptr %base, ptr %offptr, <4 x i32> %input) { 206; CHECK-LABEL: ext_signed_scaled_i16_i8: 207; CHECK: @ %bb.0: @ %entry 208; CHECK-NEXT: vldrb.s32 q1, [r1] 209; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1] 210; CHECK-NEXT: bx lr 211entry: 212 %offs = load <4 x i8>, ptr %offptr, align 1 213 %offs.sext = sext <4 x i8> %offs to <4 x i32> 214 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.sext 215 %t = trunc <4 x i32> %input to <4 x i16> 216 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 217 ret void 218} 219 220; VLDRH.z32 Qd, [base, offs.zext, uxtw #1] 221define arm_aapcs_vfpcc void @ext_unsigned_scaled_i16_i8(ptr %base, ptr %offptr, <4 x i32> %input) { 222; CHECK-LABEL: ext_unsigned_scaled_i16_i8: 223; CHECK: @ %bb.0: @ %entry 224; CHECK-NEXT: vldrb.u32 q1, [r1] 225; CHECK-NEXT: vstrh.32 q0, [r0, q1, uxtw #1] 226; CHECK-NEXT: bx lr 227entry: 228 %offs = load <4 x i8>, ptr %offptr, align 1 229 %offs.zext = zext <4 x i8> %offs to <4 x i32> 230 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs.zext 231 %t = trunc <4 x i32> %input to <4 x i16> 232 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 233 ret void 234} 235 236define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep(ptr %base, ptr %offptr, <4 x i32> %input) { 237; CHECK-LABEL: ext_scaled_i16_i32_2gep: 238; CHECK: @ %bb.0: @ %entry 239; CHECK-NEXT: vldrw.u32 q1, [r1] 240; CHECK-NEXT: movs r2, #10 241; CHECK-NEXT: movs r3, #0 242; CHECK-NEXT: vshl.i32 q1, q1, #1 243; CHECK-NEXT: vadd.i32 q1, q1, r0 244; CHECK-NEXT: vadd.i32 q1, q1, r2 245; CHECK-NEXT: vstrh.32 q0, [r3, q1] 246; CHECK-NEXT: bx lr 247entry: 248 %offs = load <4 x i32>, ptr %offptr, align 4 249 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i32> %offs 250 %ptrs2 = getelementptr inbounds i16, <4 x ptr> %ptrs, i16 5 251 %t = trunc <4 x i32> %input to <4 x i16> 252 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs2, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 253 ret void 254} 255 256define arm_aapcs_vfpcc void @ext_scaled_i16_i32_2gep2(ptr %base, ptr %offptr, <4 x i32> %input) { 257; CHECK-LABEL: ext_scaled_i16_i32_2gep2: 258; CHECK: @ %bb.0: @ %entry 259; CHECK-NEXT: adr r1, .LCPI16_0 260; CHECK-NEXT: vldrw.u32 q1, [r1] 261; CHECK-NEXT: vstrh.32 q0, [r0, q1] 262; CHECK-NEXT: bx lr 263; CHECK-NEXT: .p2align 4 264; CHECK-NEXT: @ %bb.1: 265; CHECK-NEXT: .LCPI16_0: 266; CHECK-NEXT: .long 10 @ 0xa 267; CHECK-NEXT: .long 16 @ 0x10 268; CHECK-NEXT: .long 22 @ 0x16 269; CHECK-NEXT: .long 28 @ 0x1c 270entry: 271 %ptrs = getelementptr inbounds i16, ptr %base, <4 x i16> <i16 0, i16 3, i16 6, i16 9> 272 %ptrs2 = getelementptr inbounds i16, <4 x ptr> %ptrs, i16 5 273 %t = trunc <4 x i32> %input to <4 x i16> 274 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %t, <4 x ptr> %ptrs2, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 275 ret void 276} 277 278declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>) 279declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) 280declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>) 281declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) 282declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) 283