1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16(ptr %base, ptr %offptr) { 5; CHECK-LABEL: scaled_v8i16_i16: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrh.u16 q1, [r1] 8; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 9; CHECK-NEXT: bx lr 10entry: 11 %offs = load <8 x i16>, ptr %offptr, align 2 12 %offs.zext = zext <8 x i16> %offs to <8 x i32> 13 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 14 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 15 ret <8 x i16> %gather 16} 17 18define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_i16(ptr %base, ptr %offptr) { 19; CHECK-LABEL: scaled_v8f16_i16: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vldrh.u16 q1, [r1] 22; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 23; CHECK-NEXT: bx lr 24entry: 25 %offs = load <8 x i16>, ptr %offptr, align 2 26 %offs.zext = zext <8 x i16> %offs to <8 x i32> 27 %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 28 %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr> 29 %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef) 30 ret <8 x half> %gather 31} 32 33define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_half(ptr %base, ptr %offptr) { 34; CHECK-LABEL: scaled_v8f16_half: 35; CHECK: @ %bb.0: @ %entry 36; CHECK-NEXT: vldrh.u16 q1, [r1] 37; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 38; CHECK-NEXT: bx lr 39entry: 40 %offs = load <8 x i16>, ptr %offptr, align 2 41 %offs.zext = zext <8 x i16> %offs to <8 x i32> 42 %ptrs = getelementptr inbounds half, ptr %base, <8 x i32> %offs.zext 43 %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef) 44 ret <8 x half> %gather 45} 46 47define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_sext(ptr %base, ptr %offptr) { 48; CHECK-LABEL: scaled_v8i16_sext: 49; CHECK: @ %bb.0: @ %entry 50; CHECK-NEXT: .save {r4, r5, r7, lr} 51; CHECK-NEXT: push {r4, r5, r7, lr} 52; CHECK-NEXT: vldrh.s32 q0, [r1, #8] 53; CHECK-NEXT: vshl.i32 q0, q0, #1 54; CHECK-NEXT: vadd.i32 q0, q0, r0 55; CHECK-NEXT: vmov r2, r12, d0 56; CHECK-NEXT: vmov r3, lr, d1 57; CHECK-NEXT: vldrh.s32 q0, [r1] 58; CHECK-NEXT: vshl.i32 q0, q0, #1 59; CHECK-NEXT: vadd.i32 q0, q0, r0 60; CHECK-NEXT: vmov r4, r5, d0 61; CHECK-NEXT: vmov r0, r1, d1 62; CHECK-NEXT: ldrh r2, [r2] 63; CHECK-NEXT: ldrh.w r12, [r12] 64; CHECK-NEXT: ldrh r3, [r3] 65; CHECK-NEXT: ldrh.w lr, [lr] 66; CHECK-NEXT: ldrh r4, [r4] 67; CHECK-NEXT: ldrh r5, [r5] 68; CHECK-NEXT: vmov.16 q0[0], r4 69; CHECK-NEXT: ldrh r0, [r0] 70; CHECK-NEXT: vmov.16 q0[1], r5 71; CHECK-NEXT: ldrh r1, [r1] 72; CHECK-NEXT: vmov.16 q0[2], r0 73; CHECK-NEXT: vmov.16 q0[3], r1 74; CHECK-NEXT: vmov.16 q0[4], r2 75; CHECK-NEXT: vmov.16 q0[5], r12 76; CHECK-NEXT: vmov.16 q0[6], r3 77; CHECK-NEXT: vmov.16 q0[7], lr 78; CHECK-NEXT: pop {r4, r5, r7, pc} 79entry: 80 %offs = load <8 x i16>, ptr %offptr, align 2 81 %offs.sext = sext <8 x i16> %offs to <8 x i32> 82 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.sext 83 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 84 ret <8 x i16> %gather 85} 86 87define arm_aapcs_vfpcc <8 x half> @scaled_v8f16_sext(ptr %base, ptr %offptr) { 88; CHECK-LABEL: scaled_v8f16_sext: 89; CHECK: @ %bb.0: @ %entry 90; CHECK-NEXT: vldrh.s32 q0, [r1] 91; CHECK-NEXT: vshl.i32 q0, q0, #1 92; CHECK-NEXT: vadd.i32 q0, q0, r0 93; CHECK-NEXT: vmov r2, r3, d0 94; CHECK-NEXT: vldr.16 s4, [r3] 95; CHECK-NEXT: vldr.16 s0, [r2] 96; CHECK-NEXT: vmov r2, r3, d1 97; CHECK-NEXT: vins.f16 s0, s4 98; CHECK-NEXT: vldrh.s32 q1, [r1, #8] 99; CHECK-NEXT: vldr.16 s2, [r3] 100; CHECK-NEXT: vldr.16 s1, [r2] 101; CHECK-NEXT: vshl.i32 q1, q1, #1 102; CHECK-NEXT: vadd.i32 q1, q1, r0 103; CHECK-NEXT: vins.f16 s1, s2 104; CHECK-NEXT: vmov r0, r1, d2 105; CHECK-NEXT: vldr.16 s4, [r1] 106; CHECK-NEXT: vldr.16 s2, [r0] 107; CHECK-NEXT: vmov r0, r1, d3 108; CHECK-NEXT: vins.f16 s2, s4 109; CHECK-NEXT: vldr.16 s4, [r1] 110; CHECK-NEXT: vldr.16 s3, [r0] 111; CHECK-NEXT: vins.f16 s3, s4 112; CHECK-NEXT: bx lr 113entry: 114 %offs = load <8 x i16>, ptr %offptr, align 2 115 %offs.sext = sext <8 x i16> %offs to <8 x i32> 116 %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.sext 117 %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr> 118 %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef) 119 ret <8 x half> %gather 120} 121 122define arm_aapcs_vfpcc <8 x i16> @unsigned_scaled_v8i16_i8(ptr %base, ptr %offptr) { 123; CHECK-LABEL: unsigned_scaled_v8i16_i8: 124; CHECK: @ %bb.0: @ %entry 125; CHECK-NEXT: vldrb.u16 q1, [r1] 126; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 127; CHECK-NEXT: bx lr 128entry: 129 %offs = load <8 x i8>, ptr %offptr, align 1 130 %offs.zext = zext <8 x i8> %offs to <8 x i32> 131 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 132 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 133 ret <8 x i16> %gather 134} 135 136define arm_aapcs_vfpcc <8 x half> @unsigned_scaled_v8f16_i8(ptr %base, ptr %offptr) { 137; CHECK-LABEL: unsigned_scaled_v8f16_i8: 138; CHECK: @ %bb.0: @ %entry 139; CHECK-NEXT: vldrb.u16 q1, [r1] 140; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 141; CHECK-NEXT: bx lr 142entry: 143 %offs = load <8 x i8>, ptr %offptr, align 1 144 %offs.zext = zext <8 x i8> %offs to <8 x i32> 145 %i16_ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 146 %ptrs = bitcast <8 x ptr> %i16_ptrs to <8 x ptr> 147 %gather = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x half> undef) 148 ret <8 x half> %gather 149} 150 151define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru0t(ptr %base, ptr %offptr) { 152; CHECK-LABEL: scaled_v8i16_i16_passthru0t: 153; CHECK: @ %bb.0: @ %entry 154; CHECK-NEXT: vldrh.u16 q1, [r1] 155; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 156; CHECK-NEXT: bx lr 157entry: 158 %offs = load <8 x i16>, ptr %offptr, align 2 159 %offs.zext = zext <8 x i16> %offs to <8 x i32> 160 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 161 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> zeroinitializer) 162 ret <8 x i16> %gather 163} 164 165define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru1t(ptr %base, ptr %offptr) { 166; CHECK-LABEL: scaled_v8i16_i16_passthru1t: 167; CHECK: @ %bb.0: @ %entry 168; CHECK-NEXT: vldrh.u16 q1, [r1] 169; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 170; CHECK-NEXT: bx lr 171entry: 172 %offs = load <8 x i16>, ptr %offptr, align 2 173 %offs.zext = zext <8 x i16> %offs to <8 x i32> 174 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 175 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 176 ret <8 x i16> %gather 177} 178 179define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru1f(ptr %base, ptr %offptr) { 180; CHECK-LABEL: scaled_v8i16_i16_passthru1f: 181; CHECK: @ %bb.0: @ %entry 182; CHECK-NEXT: movw r2, #65487 183; CHECK-NEXT: vmov.i16 q0, #0x1 184; CHECK-NEXT: vmsr p0, r2 185; CHECK-NEXT: vldrh.u16 q1, [r1] 186; CHECK-NEXT: vpst 187; CHECK-NEXT: vldrht.u16 q2, [r0, q1, uxtw #1] 188; CHECK-NEXT: vpsel q0, q2, q0 189; CHECK-NEXT: bx lr 190entry: 191 %offs = load <8 x i16>, ptr %offptr, align 2 192 %offs.zext = zext <8 x i16> %offs to <8 x i32> 193 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 194 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 195 ret <8 x i16> %gather 196} 197 198define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru0f(ptr %base, ptr %offptr) { 199; CHECK-LABEL: scaled_v8i16_i16_passthru0f: 200; CHECK: @ %bb.0: @ %entry 201; CHECK-NEXT: movw r2, #65523 202; CHECK-NEXT: vmsr p0, r2 203; CHECK-NEXT: vldrh.u16 q1, [r1] 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vldrht.u16 q0, [r0, q1, uxtw #1] 206; CHECK-NEXT: bx lr 207entry: 208 %offs = load <8 x i16>, ptr %offptr, align 2 209 %offs.zext = zext <8 x i16> %offs to <8 x i32> 210 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 211 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>) 212 ret <8 x i16> %gather 213} 214 215define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru_icmp0(ptr %base, ptr %offptr) { 216; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp0: 217; CHECK: @ %bb.0: @ %entry 218; CHECK-NEXT: vldrh.u16 q1, [r1] 219; CHECK-NEXT: vpt.s16 gt, q1, zr 220; CHECK-NEXT: vldrht.u16 q0, [r0, q1, uxtw #1] 221; CHECK-NEXT: bx lr 222entry: 223 %offs = load <8 x i16>, ptr %offptr, align 2 224 %offs.zext = zext <8 x i16> %offs to <8 x i32> 225 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 226 %mask = icmp sgt <8 x i16> %offs, zeroinitializer 227 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %mask, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>) 228 ret <8 x i16> %gather 229} 230 231define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru_icmp1(ptr %base, ptr %offptr) { 232; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp1: 233; CHECK: @ %bb.0: @ %entry 234; CHECK-NEXT: vldrh.u16 q1, [r1] 235; CHECK-NEXT: vmov.i16 q0, #0x1 236; CHECK-NEXT: vpt.s16 gt, q1, zr 237; CHECK-NEXT: vldrht.u16 q2, [r0, q1, uxtw #1] 238; CHECK-NEXT: vpsel q0, q2, q0 239; CHECK-NEXT: bx lr 240entry: 241 %offs = load <8 x i16>, ptr %offptr, align 2 242 %offs.zext = zext <8 x i16> %offs to <8 x i32> 243 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> %offs.zext 244 %mask = icmp sgt <8 x i16> %offs, zeroinitializer 245 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %mask, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) 246 ret <8 x i16> %gather 247} 248 249define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_2gep(ptr %base, ptr %offptr) { 250; CHECK-LABEL: scaled_v8i16_i16_2gep: 251; CHECK: @ %bb.0: @ %entry 252; CHECK-NEXT: .save {r4, r5, r7, lr} 253; CHECK-NEXT: push {r4, r5, r7, lr} 254; CHECK-NEXT: vldrh.s32 q0, [r1, #8] 255; CHECK-NEXT: mov.w r12, #40 256; CHECK-NEXT: vshl.i32 q0, q0, #1 257; CHECK-NEXT: vadd.i32 q0, q0, r0 258; CHECK-NEXT: vadd.i32 q0, q0, r12 259; CHECK-NEXT: vmov r3, lr, d0 260; CHECK-NEXT: vmov r2, r4, d1 261; CHECK-NEXT: vldrh.s32 q0, [r1] 262; CHECK-NEXT: vshl.i32 q0, q0, #1 263; CHECK-NEXT: vadd.i32 q0, q0, r0 264; CHECK-NEXT: vadd.i32 q0, q0, r12 265; CHECK-NEXT: vmov r0, r1, d1 266; CHECK-NEXT: ldrh.w r12, [lr] 267; CHECK-NEXT: ldrh.w lr, [r4] 268; CHECK-NEXT: vmov r4, r5, d0 269; CHECK-NEXT: ldrh r3, [r3] 270; CHECK-NEXT: ldrh r2, [r2] 271; CHECK-NEXT: ldrh r0, [r0] 272; CHECK-NEXT: ldrh r1, [r1] 273; CHECK-NEXT: ldrh r4, [r4] 274; CHECK-NEXT: ldrh r5, [r5] 275; CHECK-NEXT: vmov.16 q0[0], r4 276; CHECK-NEXT: vmov.16 q0[1], r5 277; CHECK-NEXT: vmov.16 q0[2], r0 278; CHECK-NEXT: vmov.16 q0[3], r1 279; CHECK-NEXT: vmov.16 q0[4], r3 280; CHECK-NEXT: vmov.16 q0[5], r12 281; CHECK-NEXT: vmov.16 q0[6], r2 282; CHECK-NEXT: vmov.16 q0[7], lr 283; CHECK-NEXT: pop {r4, r5, r7, pc} 284entry: 285 %offs = load <8 x i16>, ptr %offptr, align 2 286 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %offs 287 %ptrs2 = getelementptr inbounds i16, <8 x ptr> %ptrs, i16 20 288 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 289 ret <8 x i16> %gather 290} 291 292define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_2gep2(ptr %base, ptr %offptr) { 293; CHECK-LABEL: scaled_v8i16_i16_2gep2: 294; CHECK: @ %bb.0: @ %entry 295; CHECK-NEXT: adr r1, .LCPI14_0 296; CHECK-NEXT: vldrw.u32 q1, [r1] 297; CHECK-NEXT: vldrh.u16 q0, [r0, q1] 298; CHECK-NEXT: bx lr 299; CHECK-NEXT: .p2align 4 300; CHECK-NEXT: @ %bb.1: 301; CHECK-NEXT: .LCPI14_0: 302; CHECK-NEXT: .short 40 @ 0x28 303; CHECK-NEXT: .short 46 @ 0x2e 304; CHECK-NEXT: .short 52 @ 0x34 305; CHECK-NEXT: .short 58 @ 0x3a 306; CHECK-NEXT: .short 64 @ 0x40 307; CHECK-NEXT: .short 70 @ 0x46 308; CHECK-NEXT: .short 76 @ 0x4c 309; CHECK-NEXT: .short 82 @ 0x52 310entry: 311 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> <i16 0, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21> 312 %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i16 20 313 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 314 ret <8 x i16> %gather 315} 316 317define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep(ptr %base) { 318; CHECK-LABEL: scaled_v8i16_i16_biggep: 319; CHECK: @ %bb.0: @ %entry 320; CHECK-NEXT: adr r1, .LCPI15_0 321; CHECK-NEXT: vldrw.u32 q1, [r1] 322; CHECK-NEXT: vldrh.u16 q0, [r0, q1] 323; CHECK-NEXT: bx lr 324; CHECK-NEXT: .p2align 4 325; CHECK-NEXT: @ %bb.1: 326; CHECK-NEXT: .LCPI15_0: 327; CHECK-NEXT: .short 40 @ 0x28 328; CHECK-NEXT: .short 46 @ 0x2e 329; CHECK-NEXT: .short 52 @ 0x34 330; CHECK-NEXT: .short 58 @ 0x3a 331; CHECK-NEXT: .short 64 @ 0x40 332; CHECK-NEXT: .short 70 @ 0x46 333; CHECK-NEXT: .short 76 @ 0x4c 334; CHECK-NEXT: .short 82 @ 0x52 335entry: 336 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 337 %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i32 20 338 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 339 ret <8 x i16> %gather 340} 341 342define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep2(ptr %base) { 343; CHECK-LABEL: scaled_v8i16_i16_biggep2: 344; CHECK: @ %bb.0: @ %entry 345; CHECK-NEXT: adr r1, .LCPI16_0 346; CHECK-NEXT: vldrw.u32 q1, [r1] 347; CHECK-NEXT: vldrh.u16 q0, [r0, q1, uxtw #1] 348; CHECK-NEXT: bx lr 349; CHECK-NEXT: .p2align 4 350; CHECK-NEXT: @ %bb.1: 351; CHECK-NEXT: .LCPI16_0: 352; CHECK-NEXT: .short 0 @ 0x0 353; CHECK-NEXT: .short 3 @ 0x3 354; CHECK-NEXT: .short 6 @ 0x6 355; CHECK-NEXT: .short 9 @ 0x9 356; CHECK-NEXT: .short 12 @ 0xc 357; CHECK-NEXT: .short 15 @ 0xf 358; CHECK-NEXT: .short 18 @ 0x12 359; CHECK-NEXT: .short 21 @ 0x15 360entry: 361 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 362 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 363 ret <8 x i16> %gather 364} 365 366define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep3(ptr %base) { 367; CHECK-LABEL: scaled_v8i16_i16_biggep3: 368; CHECK: @ %bb.0: @ %entry 369; CHECK-NEXT: .save {r4, r5, r7, lr} 370; CHECK-NEXT: push {r4, r5, r7, lr} 371; CHECK-NEXT: adr r1, .LCPI17_0 372; CHECK-NEXT: adr r2, .LCPI17_1 373; CHECK-NEXT: vldrw.u32 q0, [r1] 374; CHECK-NEXT: vadd.i32 q0, q0, r0 375; CHECK-NEXT: vmov r1, lr, d0 376; CHECK-NEXT: vmov r3, r12, d1 377; CHECK-NEXT: vldrw.u32 q0, [r2] 378; CHECK-NEXT: vadd.i32 q0, q0, r0 379; CHECK-NEXT: vmov r4, r5, d0 380; CHECK-NEXT: vmov r0, r2, d1 381; CHECK-NEXT: ldrh r1, [r1] 382; CHECK-NEXT: ldrh.w lr, [lr] 383; CHECK-NEXT: ldrh r3, [r3] 384; CHECK-NEXT: ldrh.w r12, [r12] 385; CHECK-NEXT: ldrh r4, [r4] 386; CHECK-NEXT: ldrh r5, [r5] 387; CHECK-NEXT: vmov.16 q0[0], r4 388; CHECK-NEXT: ldrh r0, [r0] 389; CHECK-NEXT: vmov.16 q0[1], r5 390; CHECK-NEXT: ldrh r2, [r2] 391; CHECK-NEXT: vmov.16 q0[2], r0 392; CHECK-NEXT: vmov.16 q0[3], r2 393; CHECK-NEXT: vmov.16 q0[4], r1 394; CHECK-NEXT: vmov.16 q0[5], lr 395; CHECK-NEXT: vmov.16 q0[6], r3 396; CHECK-NEXT: vmov.16 q0[7], r12 397; CHECK-NEXT: pop {r4, r5, r7, pc} 398; CHECK-NEXT: .p2align 4 399; CHECK-NEXT: @ %bb.1: 400; CHECK-NEXT: .LCPI17_0: 401; CHECK-NEXT: .long 131096 @ 0x20018 402; CHECK-NEXT: .long 131102 @ 0x2001e 403; CHECK-NEXT: .long 131108 @ 0x20024 404; CHECK-NEXT: .long 131114 @ 0x2002a 405; CHECK-NEXT: .LCPI17_1: 406; CHECK-NEXT: .long 131072 @ 0x20000 407; CHECK-NEXT: .long 131078 @ 0x20006 408; CHECK-NEXT: .long 131084 @ 0x2000c 409; CHECK-NEXT: .long 131090 @ 0x20012 410entry: 411 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 412 %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i32 65536 413 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 414 ret <8 x i16> %gather 415} 416 417define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep4(ptr %base) { 418; CHECK-LABEL: scaled_v8i16_i16_biggep4: 419; CHECK: @ %bb.0: @ %entry 420; CHECK-NEXT: .save {r4, r5, r7, lr} 421; CHECK-NEXT: push {r4, r5, r7, lr} 422; CHECK-NEXT: adr r1, .LCPI18_0 423; CHECK-NEXT: adr r2, .LCPI18_1 424; CHECK-NEXT: vldrw.u32 q0, [r1] 425; CHECK-NEXT: vadd.i32 q0, q0, r0 426; CHECK-NEXT: vmov r1, lr, d0 427; CHECK-NEXT: vmov r3, r12, d1 428; CHECK-NEXT: vldrw.u32 q0, [r2] 429; CHECK-NEXT: vadd.i32 q0, q0, r0 430; CHECK-NEXT: vmov r4, r5, d0 431; CHECK-NEXT: vmov r0, r2, d1 432; CHECK-NEXT: ldrh r1, [r1] 433; CHECK-NEXT: ldrh.w lr, [lr] 434; CHECK-NEXT: ldrh r3, [r3] 435; CHECK-NEXT: ldrh.w r12, [r12] 436; CHECK-NEXT: ldrh r4, [r4] 437; CHECK-NEXT: ldrh r5, [r5] 438; CHECK-NEXT: vmov.16 q0[0], r4 439; CHECK-NEXT: ldrh r0, [r0] 440; CHECK-NEXT: vmov.16 q0[1], r5 441; CHECK-NEXT: ldrh r2, [r2] 442; CHECK-NEXT: vmov.16 q0[2], r0 443; CHECK-NEXT: vmov.16 q0[3], r2 444; CHECK-NEXT: vmov.16 q0[4], r1 445; CHECK-NEXT: vmov.16 q0[5], lr 446; CHECK-NEXT: vmov.16 q0[6], r3 447; CHECK-NEXT: vmov.16 q0[7], r12 448; CHECK-NEXT: pop {r4, r5, r7, pc} 449; CHECK-NEXT: .p2align 4 450; CHECK-NEXT: @ %bb.1: 451; CHECK-NEXT: .LCPI18_0: 452; CHECK-NEXT: .long 24 @ 0x18 453; CHECK-NEXT: .long 131072 @ 0x20000 454; CHECK-NEXT: .long 36 @ 0x24 455; CHECK-NEXT: .long 42 @ 0x2a 456; CHECK-NEXT: .LCPI18_1: 457; CHECK-NEXT: .long 0 @ 0x0 458; CHECK-NEXT: .long 6 @ 0x6 459; CHECK-NEXT: .long 12 @ 0xc 460; CHECK-NEXT: .long 18 @ 0x12 461entry: 462 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 65536, i32 18, i32 21> 463 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 464 ret <8 x i16> %gather 465} 466 467define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep5(<8 x ptr> %base) { 468; CHECK-LABEL: scaled_v8i16_i16_biggep5: 469; CHECK: @ %bb.0: @ %entry 470; CHECK-NEXT: .save {r4, r5, r6, lr} 471; CHECK-NEXT: push {r4, r5, r6, lr} 472; CHECK-NEXT: mov.w r12, #131072 473; CHECK-NEXT: vadd.i32 q0, q0, r12 474; CHECK-NEXT: vadd.i32 q1, q1, r12 475; CHECK-NEXT: vmov r4, r5, d0 476; CHECK-NEXT: vmov r0, r12, d1 477; CHECK-NEXT: vmov r3, lr, d3 478; CHECK-NEXT: vmov r1, r2, d2 479; CHECK-NEXT: ldrh r4, [r4] 480; CHECK-NEXT: ldrh r5, [r5] 481; CHECK-NEXT: vmov.16 q0[0], r4 482; CHECK-NEXT: ldrh r0, [r0] 483; CHECK-NEXT: vmov.16 q0[1], r5 484; CHECK-NEXT: ldrh r6, [r3] 485; CHECK-NEXT: ldrh.w r3, [r12] 486; CHECK-NEXT: vmov.16 q0[2], r0 487; CHECK-NEXT: ldrh r1, [r1] 488; CHECK-NEXT: vmov.16 q0[3], r3 489; CHECK-NEXT: ldrh r2, [r2] 490; CHECK-NEXT: vmov.16 q0[4], r1 491; CHECK-NEXT: ldrh.w lr, [lr] 492; CHECK-NEXT: vmov.16 q0[5], r2 493; CHECK-NEXT: vmov.16 q0[6], r6 494; CHECK-NEXT: vmov.16 q0[7], lr 495; CHECK-NEXT: pop {r4, r5, r6, pc} 496entry: 497 %ptrs2 = getelementptr inbounds i16,<8 x ptr> %base, i32 65536 498 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 499 ret <8 x i16> %gather 500} 501 502define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep6(ptr %base) { 503; CHECK-LABEL: scaled_v8i16_i16_biggep6: 504; CHECK: @ %bb.0: @ %entry 505; CHECK-NEXT: .save {r4, r5, r7, lr} 506; CHECK-NEXT: push {r4, r5, r7, lr} 507; CHECK-NEXT: adr r1, .LCPI20_0 508; CHECK-NEXT: adr r2, .LCPI20_1 509; CHECK-NEXT: vldrw.u32 q0, [r1] 510; CHECK-NEXT: vadd.i32 q0, q0, r0 511; CHECK-NEXT: vmov r1, lr, d0 512; CHECK-NEXT: vmov r3, r12, d1 513; CHECK-NEXT: vldrw.u32 q0, [r2] 514; CHECK-NEXT: vadd.i32 q0, q0, r0 515; CHECK-NEXT: vmov r4, r5, d0 516; CHECK-NEXT: vmov r0, r2, d1 517; CHECK-NEXT: ldrh r1, [r1] 518; CHECK-NEXT: ldrh.w lr, [lr] 519; CHECK-NEXT: ldrh r3, [r3] 520; CHECK-NEXT: ldrh.w r12, [r12] 521; CHECK-NEXT: ldrh r4, [r4] 522; CHECK-NEXT: ldrh r5, [r5] 523; CHECK-NEXT: vmov.16 q0[0], r4 524; CHECK-NEXT: ldrh r0, [r0] 525; CHECK-NEXT: vmov.16 q0[1], r5 526; CHECK-NEXT: ldrh r2, [r2] 527; CHECK-NEXT: vmov.16 q0[2], r0 528; CHECK-NEXT: vmov.16 q0[3], r2 529; CHECK-NEXT: vmov.16 q0[4], r1 530; CHECK-NEXT: vmov.16 q0[5], lr 531; CHECK-NEXT: vmov.16 q0[6], r3 532; CHECK-NEXT: vmov.16 q0[7], r12 533; CHECK-NEXT: pop {r4, r5, r7, pc} 534; CHECK-NEXT: .p2align 4 535; CHECK-NEXT: @ %bb.1: 536; CHECK-NEXT: .LCPI20_0: 537; CHECK-NEXT: .long 131074 @ 0x20002 538; CHECK-NEXT: .long 32 @ 0x20 539; CHECK-NEXT: .long 38 @ 0x26 540; CHECK-NEXT: .long 44 @ 0x2c 541; CHECK-NEXT: .LCPI20_1: 542; CHECK-NEXT: .long 2 @ 0x2 543; CHECK-NEXT: .long 8 @ 0x8 544; CHECK-NEXT: .long 14 @ 0xe 545; CHECK-NEXT: .long 20 @ 0x14 546entry: 547 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 65536, i32 15, i32 18, i32 21> 548 %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i32 1 549 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 550 ret <8 x i16> %gather 551} 552 553define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_biggep7(ptr %base, ptr %offptr) { 554; CHECK-LABEL: scaled_v8i16_i16_biggep7: 555; CHECK: @ %bb.0: @ %entry 556; CHECK-NEXT: .save {r4, r5, r7, lr} 557; CHECK-NEXT: push {r4, r5, r7, lr} 558; CHECK-NEXT: adr r1, .LCPI21_0 559; CHECK-NEXT: adr r2, .LCPI21_1 560; CHECK-NEXT: vldrw.u32 q0, [r1] 561; CHECK-NEXT: vadd.i32 q0, q0, r0 562; CHECK-NEXT: vmov r1, lr, d0 563; CHECK-NEXT: vmov r3, r12, d1 564; CHECK-NEXT: vldrw.u32 q0, [r2] 565; CHECK-NEXT: vadd.i32 q0, q0, r0 566; CHECK-NEXT: vmov r4, r5, d0 567; CHECK-NEXT: vmov r0, r2, d1 568; CHECK-NEXT: ldrh r1, [r1] 569; CHECK-NEXT: ldrh.w lr, [lr] 570; CHECK-NEXT: ldrh r3, [r3] 571; CHECK-NEXT: ldrh.w r12, [r12] 572; CHECK-NEXT: ldrh r4, [r4] 573; CHECK-NEXT: ldrh r5, [r5] 574; CHECK-NEXT: vmov.16 q0[0], r4 575; CHECK-NEXT: ldrh r0, [r0] 576; CHECK-NEXT: vmov.16 q0[1], r5 577; CHECK-NEXT: ldrh r2, [r2] 578; CHECK-NEXT: vmov.16 q0[2], r0 579; CHECK-NEXT: vmov.16 q0[3], r2 580; CHECK-NEXT: vmov.16 q0[4], r1 581; CHECK-NEXT: vmov.16 q0[5], lr 582; CHECK-NEXT: vmov.16 q0[6], r3 583; CHECK-NEXT: vmov.16 q0[7], r12 584; CHECK-NEXT: pop {r4, r5, r7, pc} 585; CHECK-NEXT: .p2align 4 586; CHECK-NEXT: @ %bb.1: 587; CHECK-NEXT: .LCPI21_0: 588; CHECK-NEXT: .long 1224 @ 0x4c8 589; CHECK-NEXT: .long 1230 @ 0x4ce 590; CHECK-NEXT: .long 1236 @ 0x4d4 591; CHECK-NEXT: .long 1242 @ 0x4da 592; CHECK-NEXT: .LCPI21_1: 593; CHECK-NEXT: .long 128 @ 0x80 594; CHECK-NEXT: .long 1206 @ 0x4b6 595; CHECK-NEXT: .long 1212 @ 0x4bc 596; CHECK-NEXT: .long 1218 @ 0x4c2 597entry: 598 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> <i16 65000, i16 3, i16 6, i16 9, i16 12, i16 15, i16 18, i16 21> 599 %ptrs2 = getelementptr inbounds i16,<8 x ptr> %ptrs, i16 600 600 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs2, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 601 ret <8 x i16> %gather 602} 603 604define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_basei32(ptr %base, ptr %offptr) { 605; CHECK-LABEL: scaled_v8i16_i16_basei32: 606; CHECK: @ %bb.0: @ %entry 607; CHECK-NEXT: .save {r4, r5, r7, lr} 608; CHECK-NEXT: push {r4, r5, r7, lr} 609; CHECK-NEXT: vldrh.u32 q0, [r1, #8] 610; CHECK-NEXT: vshl.i32 q0, q0, #2 611; CHECK-NEXT: vadd.i32 q0, q0, r0 612; CHECK-NEXT: vmov r2, r12, d0 613; CHECK-NEXT: vmov r3, lr, d1 614; CHECK-NEXT: vldrh.u32 q0, [r1] 615; CHECK-NEXT: vshl.i32 q0, q0, #2 616; CHECK-NEXT: vadd.i32 q0, q0, r0 617; CHECK-NEXT: vmov r4, r5, d0 618; CHECK-NEXT: vmov r0, r1, d1 619; CHECK-NEXT: ldrh r2, [r2] 620; CHECK-NEXT: ldrh.w r12, [r12] 621; CHECK-NEXT: ldrh r3, [r3] 622; CHECK-NEXT: ldrh.w lr, [lr] 623; CHECK-NEXT: ldrh r4, [r4] 624; CHECK-NEXT: ldrh r5, [r5] 625; CHECK-NEXT: vmov.16 q0[0], r4 626; CHECK-NEXT: ldrh r0, [r0] 627; CHECK-NEXT: vmov.16 q0[1], r5 628; CHECK-NEXT: ldrh r1, [r1] 629; CHECK-NEXT: vmov.16 q0[2], r0 630; CHECK-NEXT: vmov.16 q0[3], r1 631; CHECK-NEXT: vmov.16 q0[4], r2 632; CHECK-NEXT: vmov.16 q0[5], r12 633; CHECK-NEXT: vmov.16 q0[6], r3 634; CHECK-NEXT: vmov.16 q0[7], lr 635; CHECK-NEXT: pop {r4, r5, r7, pc} 636entry: 637 %offs = load <8 x i16>, ptr %offptr, align 2 638 %offs.zext = zext <8 x i16> %offs to <8 x i32> 639 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offs.zext 640 %ptrs.cast = bitcast <8 x ptr> %ptrs to <8 x ptr> 641 %gather = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs.cast, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 642 ret <8 x i16> %gather 643} 644 645declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) #1 646declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) #1 647declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>) #1 648