1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i8_i32(ptr %base, ptr %offptr) { 5; CHECK-LABEL: zext_unscaled_i8_i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrw.u32 q1, [r1] 8; CHECK-NEXT: vldrb.u32 q0, [r0, q1] 9; CHECK-NEXT: bx lr 10entry: 11 %offs = load <4 x i32>, ptr %offptr, align 4 12 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs 13 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 14 %gather.zext = zext <4 x i8> %gather to <4 x i32> 15 ret <4 x i32> %gather.zext 16} 17 18define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i8_i32(ptr %base, ptr %offptr) { 19; CHECK-LABEL: sext_unscaled_i8_i32: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vldrw.u32 q1, [r1] 22; CHECK-NEXT: vldrb.s32 q0, [r0, q1] 23; CHECK-NEXT: bx lr 24entry: 25 %offs = load <4 x i32>, ptr %offptr, align 4 26 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs 27 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 28 %gather.sext = sext <4 x i8> %gather to <4 x i32> 29 ret <4 x i32> %gather.sext 30} 31 32define arm_aapcs_vfpcc <4 x i32> @zext_unscaled_i16_i32(ptr %base, ptr %offptr) { 33; CHECK-LABEL: zext_unscaled_i16_i32: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vldrw.u32 q1, [r1] 36; CHECK-NEXT: vldrh.u32 q0, [r0, q1] 37; CHECK-NEXT: bx lr 38entry: 39 %offs = load <4 x i32>, ptr %offptr, align 4 40 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs 41 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 42 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 43 %gather.zext = zext <4 x i16> %gather to <4 x i32> 44 ret <4 x i32> %gather.zext 45} 46 47define arm_aapcs_vfpcc <4 x i32> @sext_unscaled_i16_i32(ptr %base, ptr %offptr) { 48; CHECK-LABEL: sext_unscaled_i16_i32: 49; CHECK: @ %bb.0: @ %entry 50; CHECK-NEXT: vldrw.u32 q1, [r1] 51; CHECK-NEXT: vldrh.s32 q0, [r0, q1] 52; CHECK-NEXT: bx lr 53entry: 54 %offs = load <4 x i32>, ptr %offptr, align 4 55 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs 56 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 57 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 58 %gather.sext = sext <4 x i16> %gather to <4 x i32> 59 ret <4 x i32> %gather.sext 60} 61 62define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32(ptr %base, ptr %offptr) { 63; CHECK-LABEL: unscaled_i32_i32: 64; CHECK: @ %bb.0: @ %entry 65; CHECK-NEXT: vldrw.u32 q1, [r1] 66; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 67; CHECK-NEXT: bx lr 68entry: 69 %offs = load <4 x i32>, ptr %offptr, align 4 70 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs 71 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 72 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 73 ret <4 x i32> %gather 74} 75 76define arm_aapcs_vfpcc <4 x float> @unscaled_f32_i32(ptr %base, ptr %offptr) { 77; CHECK-LABEL: unscaled_f32_i32: 78; CHECK: @ %bb.0: @ %entry 79; CHECK-NEXT: vldrw.u32 q1, [r1] 80; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 81; CHECK-NEXT: bx lr 82entry: 83 %offs = load <4 x i32>, ptr %offptr, align 4 84 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs 85 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 86 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 87 ret <4 x float> %gather 88} 89 90define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i16(ptr %base, ptr %offptr) { 91; CHECK-LABEL: unsigned_unscaled_b_i32_i16: 92; CHECK: @ %bb.0: @ %entry 93; CHECK-NEXT: vldrh.u32 q1, [r1] 94; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 95; CHECK-NEXT: bx lr 96entry: 97 %offs = load <4 x i16>, ptr %offptr, align 2 98 %offs.zext = zext <4 x i16> %offs to <4 x i32> 99 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 100 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 101 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 102 ret <4 x i32> %gather 103} 104 105define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i16(ptr %base, ptr %offptr) { 106; CHECK-LABEL: signed_unscaled_i32_i16: 107; CHECK: @ %bb.0: @ %entry 108; CHECK-NEXT: vldrh.s32 q1, [r1] 109; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 110; CHECK-NEXT: bx lr 111entry: 112 %offs = load <4 x i16>, ptr %offptr, align 2 113 %offs.sext = sext <4 x i16> %offs to <4 x i32> 114 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 115 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 116 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 117 ret <4 x i32> %gather 118} 119 120define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i16(ptr %base, ptr %offptr) { 121; CHECK-LABEL: a_unsigned_unscaled_f32_i16: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vldrh.u32 q1, [r1] 124; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 125; CHECK-NEXT: bx lr 126entry: 127 %offs = load <4 x i16>, ptr %offptr, align 2 128 %offs.zext = zext <4 x i16> %offs to <4 x i32> 129 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 130 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 131 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 132 ret <4 x float> %gather 133} 134 135define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i16(ptr %base, ptr %offptr) { 136; CHECK-LABEL: b_signed_unscaled_f32_i16: 137; CHECK: @ %bb.0: @ %entry 138; CHECK-NEXT: vldrh.s32 q1, [r1] 139; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 140; CHECK-NEXT: bx lr 141entry: 142 %offs = load <4 x i16>, ptr %offptr, align 2 143 %offs.sext = sext <4 x i16> %offs to <4 x i32> 144 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 145 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 146 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 147 ret <4 x float> %gather 148} 149 150define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i16(ptr %base, ptr %offptr) { 151; CHECK-LABEL: zext_signed_unscaled_i16_i16: 152; CHECK: @ %bb.0: @ %entry 153; CHECK-NEXT: vldrh.s32 q1, [r1] 154; CHECK-NEXT: vldrh.u32 q0, [r0, q1] 155; CHECK-NEXT: bx lr 156entry: 157 %offs = load <4 x i16>, ptr %offptr, align 2 158 %offs.sext = sext <4 x i16> %offs to <4 x i32> 159 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 160 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 161 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 162 %gather.zext = zext <4 x i16> %gather to <4 x i32> 163 ret <4 x i32> %gather.zext 164} 165 166define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i16(ptr %base, ptr %offptr) { 167; CHECK-LABEL: sext_signed_unscaled_i16_i16: 168; CHECK: @ %bb.0: @ %entry 169; CHECK-NEXT: vldrh.s32 q1, [r1] 170; CHECK-NEXT: vldrh.s32 q0, [r0, q1] 171; CHECK-NEXT: bx lr 172entry: 173 %offs = load <4 x i16>, ptr %offptr, align 2 174 %offs.sext = sext <4 x i16> %offs to <4 x i32> 175 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 176 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 177 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 178 %gather.sext = sext <4 x i16> %gather to <4 x i32> 179 ret <4 x i32> %gather.sext 180} 181 182define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i16(ptr %base, ptr %offptr) { 183; CHECK-LABEL: zext_unsigned_unscaled_i16_i16: 184; CHECK: @ %bb.0: @ %entry 185; CHECK-NEXT: vldrh.u32 q1, [r1] 186; CHECK-NEXT: vldrh.u32 q0, [r0, q1] 187; CHECK-NEXT: bx lr 188entry: 189 %offs = load <4 x i16>, ptr %offptr, align 2 190 %offs.zext = zext <4 x i16> %offs to <4 x i32> 191 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 192 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 193 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 194 %gather.zext = zext <4 x i16> %gather to <4 x i32> 195 ret <4 x i32> %gather.zext 196} 197 198define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i16(ptr %base, ptr %offptr) { 199; CHECK-LABEL: sext_unsigned_unscaled_i16_i16: 200; CHECK: @ %bb.0: @ %entry 201; CHECK-NEXT: vldrh.u32 q1, [r1] 202; CHECK-NEXT: vldrh.s32 q0, [r0, q1] 203; CHECK-NEXT: bx lr 204entry: 205 %offs = load <4 x i16>, ptr %offptr, align 2 206 %offs.zext = zext <4 x i16> %offs to <4 x i32> 207 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 208 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 209 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 210 %gather.sext = sext <4 x i16> %gather to <4 x i32> 211 ret <4 x i32> %gather.sext 212} 213 214define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i16(ptr %base, ptr %offptr) { 215; CHECK-LABEL: zext_signed_unscaled_i8_i16: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vldrh.s32 q1, [r1] 218; CHECK-NEXT: vldrb.u32 q0, [r0, q1] 219; CHECK-NEXT: bx lr 220entry: 221 %offs = load <4 x i16>, ptr %offptr, align 2 222 %offs.sext = sext <4 x i16> %offs to <4 x i32> 223 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 224 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 225 %gather.zext = zext <4 x i8> %gather to <4 x i32> 226 ret <4 x i32> %gather.zext 227} 228 229define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i16(ptr %base, ptr %offptr) { 230; CHECK-LABEL: sext_signed_unscaled_i8_i16: 231; CHECK: @ %bb.0: @ %entry 232; CHECK-NEXT: vldrh.s32 q1, [r1] 233; CHECK-NEXT: vldrb.s32 q0, [r0, q1] 234; CHECK-NEXT: bx lr 235entry: 236 %offs = load <4 x i16>, ptr %offptr, align 2 237 %offs.sext = sext <4 x i16> %offs to <4 x i32> 238 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 239 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 240 %gather.sext = sext <4 x i8> %gather to <4 x i32> 241 ret <4 x i32> %gather.sext 242} 243 244define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i16(ptr %base, ptr %offptr) { 245; CHECK-LABEL: zext_unsigned_unscaled_i8_i16: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: vldrh.u32 q1, [r1] 248; CHECK-NEXT: vldrb.u32 q0, [r0, q1] 249; CHECK-NEXT: bx lr 250entry: 251 %offs = load <4 x i16>, ptr %offptr, align 2 252 %offs.zext = zext <4 x i16> %offs to <4 x i32> 253 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 254 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 255 %gather.zext = zext <4 x i8> %gather to <4 x i32> 256 ret <4 x i32> %gather.zext 257} 258 259define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i16(ptr %base, ptr %offptr) { 260; CHECK-LABEL: sext_unsigned_unscaled_i8_i16: 261; CHECK: @ %bb.0: @ %entry 262; CHECK-NEXT: vldrh.u32 q1, [r1] 263; CHECK-NEXT: vldrb.s32 q0, [r0, q1] 264; CHECK-NEXT: bx lr 265entry: 266 %offs = load <4 x i16>, ptr %offptr, align 2 267 %offs.zext = zext <4 x i16> %offs to <4 x i32> 268 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 269 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 270 %gather.sext = sext <4 x i8> %gather to <4 x i32> 271 ret <4 x i32> %gather.sext 272} 273 274define arm_aapcs_vfpcc <4 x i32> @unsigned_unscaled_b_i32_i8(ptr %base, ptr %offptr) { 275; CHECK-LABEL: unsigned_unscaled_b_i32_i8: 276; CHECK: @ %bb.0: @ %entry 277; CHECK-NEXT: vldrb.u32 q1, [r1] 278; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 279; CHECK-NEXT: bx lr 280entry: 281 %offs = load <4 x i8>, ptr %offptr, align 1 282 %offs.zext = zext <4 x i8> %offs to <4 x i32> 283 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 284 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 285 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 286 ret <4 x i32> %gather 287} 288 289define arm_aapcs_vfpcc <4 x i32> @signed_unscaled_i32_i8(ptr %base, ptr %offptr) { 290; CHECK-LABEL: signed_unscaled_i32_i8: 291; CHECK: @ %bb.0: @ %entry 292; CHECK-NEXT: vldrb.s32 q1, [r1] 293; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 294; CHECK-NEXT: bx lr 295entry: 296 %offs = load <4 x i8>, ptr %offptr, align 1 297 %offs.sext = sext <4 x i8> %offs to <4 x i32> 298 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 299 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 300 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 301 ret <4 x i32> %gather 302} 303 304define arm_aapcs_vfpcc <4 x float> @a_unsigned_unscaled_f32_i8(ptr %base, ptr %offptr) { 305; CHECK-LABEL: a_unsigned_unscaled_f32_i8: 306; CHECK: @ %bb.0: @ %entry 307; CHECK-NEXT: vldrb.u32 q1, [r1] 308; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 309; CHECK-NEXT: bx lr 310entry: 311 %offs = load <4 x i8>, ptr %offptr, align 1 312 %offs.zext = zext <4 x i8> %offs to <4 x i32> 313 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 314 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 315 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 316 ret <4 x float> %gather 317} 318 319define arm_aapcs_vfpcc <4 x float> @b_signed_unscaled_f32_i8(ptr %base, ptr %offptr) { 320; CHECK-LABEL: b_signed_unscaled_f32_i8: 321; CHECK: @ %bb.0: @ %entry 322; CHECK-NEXT: vldrb.s32 q1, [r1] 323; CHECK-NEXT: vldrw.u32 q0, [r0, q1] 324; CHECK-NEXT: bx lr 325entry: 326 %offs = load <4 x i8>, ptr %offptr, align 1 327 %offs.sext = sext <4 x i8> %offs to <4 x i32> 328 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 329 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 330 %gather = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 331 ret <4 x float> %gather 332} 333 334define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i16_i8(ptr %base, ptr %offptr) { 335; CHECK-LABEL: zext_signed_unscaled_i16_i8: 336; CHECK: @ %bb.0: @ %entry 337; CHECK-NEXT: vldrb.s32 q1, [r1] 338; CHECK-NEXT: vldrh.u32 q0, [r0, q1] 339; CHECK-NEXT: bx lr 340entry: 341 %offs = load <4 x i8>, ptr %offptr, align 1 342 %offs.sext = sext <4 x i8> %offs to <4 x i32> 343 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 344 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 345 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 346 %gather.zext = zext <4 x i16> %gather to <4 x i32> 347 ret <4 x i32> %gather.zext 348} 349 350define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i16_i8(ptr %base, ptr %offptr) { 351; CHECK-LABEL: sext_signed_unscaled_i16_i8: 352; CHECK: @ %bb.0: @ %entry 353; CHECK-NEXT: vldrb.s32 q1, [r1] 354; CHECK-NEXT: vldrh.s32 q0, [r0, q1] 355; CHECK-NEXT: bx lr 356entry: 357 %offs = load <4 x i8>, ptr %offptr, align 1 358 %offs.sext = sext <4 x i8> %offs to <4 x i32> 359 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 360 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 361 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 362 %gather.sext = sext <4 x i16> %gather to <4 x i32> 363 ret <4 x i32> %gather.sext 364} 365 366define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i16_i8(ptr %base, ptr %offptr) { 367; CHECK-LABEL: zext_unsigned_unscaled_i16_i8: 368; CHECK: @ %bb.0: @ %entry 369; CHECK-NEXT: vldrb.u32 q1, [r1] 370; CHECK-NEXT: vldrh.u32 q0, [r0, q1] 371; CHECK-NEXT: bx lr 372entry: 373 %offs = load <4 x i8>, ptr %offptr, align 1 374 %offs.zext = zext <4 x i8> %offs to <4 x i32> 375 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 376 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 377 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 378 %gather.zext = zext <4 x i16> %gather to <4 x i32> 379 ret <4 x i32> %gather.zext 380} 381 382define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i16_i8(ptr %base, ptr %offptr) { 383; CHECK-LABEL: sext_unsigned_unscaled_i16_i8: 384; CHECK: @ %bb.0: @ %entry 385; CHECK-NEXT: vldrb.u32 q1, [r1] 386; CHECK-NEXT: vldrh.s32 q0, [r0, q1] 387; CHECK-NEXT: bx lr 388entry: 389 %offs = load <4 x i8>, ptr %offptr, align 1 390 %offs.zext = zext <4 x i8> %offs to <4 x i32> 391 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 392 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 393 %gather = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 394 %gather.sext = sext <4 x i16> %gather to <4 x i32> 395 ret <4 x i32> %gather.sext 396} 397 398define arm_aapcs_vfpcc <4 x i32> @zext_signed_unscaled_i8_i8(ptr %base, ptr %offptr) { 399; CHECK-LABEL: zext_signed_unscaled_i8_i8: 400; CHECK: @ %bb.0: @ %entry 401; CHECK-NEXT: vldrb.s32 q1, [r1] 402; CHECK-NEXT: vldrb.u32 q0, [r0, q1] 403; CHECK-NEXT: bx lr 404entry: 405 %offs = load <4 x i8>, ptr %offptr, align 1 406 %offs.sext = sext <4 x i8> %offs to <4 x i32> 407 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 408 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 409 %gather.zext = zext <4 x i8> %gather to <4 x i32> 410 ret <4 x i32> %gather.zext 411} 412 413define arm_aapcs_vfpcc <4 x i32> @sext_signed_unscaled_i8_i8(ptr %base, ptr %offptr) { 414; CHECK-LABEL: sext_signed_unscaled_i8_i8: 415; CHECK: @ %bb.0: @ %entry 416; CHECK-NEXT: vldrb.s32 q1, [r1] 417; CHECK-NEXT: vldrb.s32 q0, [r0, q1] 418; CHECK-NEXT: bx lr 419entry: 420 %offs = load <4 x i8>, ptr %offptr, align 1 421 %offs.sext = sext <4 x i8> %offs to <4 x i32> 422 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.sext 423 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 424 %gather.sext = sext <4 x i8> %gather to <4 x i32> 425 ret <4 x i32> %gather.sext 426} 427 428define arm_aapcs_vfpcc <4 x i32> @zext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) { 429; CHECK-LABEL: zext_unsigned_unscaled_i8_i8: 430; CHECK: @ %bb.0: @ %entry 431; CHECK-NEXT: vldrb.u32 q1, [r1] 432; CHECK-NEXT: vldrb.u32 q0, [r0, q1] 433; CHECK-NEXT: bx lr 434entry: 435 %offs = load <4 x i8>, ptr %offptr, align 1 436 %offs.zext = zext <4 x i8> %offs to <4 x i32> 437 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 438 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 439 %gather.zext = zext <4 x i8> %gather to <4 x i32> 440 ret <4 x i32> %gather.zext 441} 442 443define arm_aapcs_vfpcc <4 x i32> @sext_unsigned_unscaled_i8_i8(ptr %base, ptr %offptr) { 444; CHECK-LABEL: sext_unsigned_unscaled_i8_i8: 445; CHECK: @ %bb.0: @ %entry 446; CHECK-NEXT: vldrb.u32 q1, [r1] 447; CHECK-NEXT: vldrb.s32 q0, [r0, q1] 448; CHECK-NEXT: bx lr 449entry: 450 %offs = load <4 x i8>, ptr %offptr, align 1 451 %offs.zext = zext <4 x i8> %offs to <4 x i32> 452 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 453 %gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 454 %gather.sext = sext <4 x i8> %gather to <4 x i32> 455 ret <4 x i32> %gather.sext 456} 457 458; VLDRW.u32 Qd, [P, 4] 459define arm_aapcs_vfpcc <4 x i32> @qi4(<4 x ptr> %p) { 460; CHECK-LABEL: qi4: 461; CHECK: @ %bb.0: @ %entry 462; CHECK-NEXT: movs r0, #16 463; CHECK-NEXT: vadd.i32 q1, q0, r0 464; CHECK-NEXT: vldrw.u32 q0, [q1] 465; CHECK-NEXT: bx lr 466entry: 467 %g = getelementptr inbounds i32, <4 x ptr> %p, i32 4 468 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %g, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 469 ret <4 x i32> %gather 470} 471 472define arm_aapcs_vfpcc <4 x i32> @qi4_unaligned(<4 x ptr> %p) { 473; CHECK-LABEL: qi4_unaligned: 474; CHECK: @ %bb.0: @ %entry 475; CHECK-NEXT: movs r0, #16 476; CHECK-NEXT: vadd.i32 q0, q0, r0 477; CHECK-NEXT: vmov r0, r1, d1 478; CHECK-NEXT: vmov r2, r3, d0 479; CHECK-NEXT: ldr r0, [r0] 480; CHECK-NEXT: ldr r2, [r2] 481; CHECK-NEXT: ldr r1, [r1] 482; CHECK-NEXT: ldr r3, [r3] 483; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 484; CHECK-NEXT: vmov q0[3], q0[1], r3, r1 485; CHECK-NEXT: bx lr 486entry: 487 %g = getelementptr inbounds i32, <4 x ptr> %p, i32 4 488 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %g, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 489 ret <4 x i32> %gather 490} 491 492declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>) 493declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) 494declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) 495declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>) 496declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) 497