1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8(ptr %base, ptr %offptr) { 5; CHECK-LABEL: unscaled_v16i8_i8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrb.u8 q1, [r1] 8; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 9; CHECK-NEXT: bx lr 10entry: 11 %offs = load <16 x i8>, ptr %offptr, align 1 12 %offs.zext = zext <16 x i8> %offs to <16 x i32> 13 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> %offs.zext 14 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 15 ret <16 x i8> %gather 16} 17 18define arm_aapcs_vfpcc <8 x i8> @unscaled_v8i8_i8(ptr %base, ptr %offptr) { 19; CHECK-LABEL: unscaled_v8i8_i8: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vldrb.u16 q1, [r1] 22; CHECK-NEXT: vldrb.u16 q0, [r0, q1] 23; CHECK-NEXT: bx lr 24entry: 25 %offs = load <8 x i8>, ptr %offptr, align 1 26 %offs.zext = zext <8 x i8> %offs to <8 x i32> 27 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i32> %offs.zext 28 %gather = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 29 ret <8 x i8> %gather 30} 31 32define arm_aapcs_vfpcc <2 x i8> @unscaled_v2i8_i8(ptr %base, ptr %offptr) { 33; CHECK-LABEL: unscaled_v2i8_i8: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: ldrb r2, [r1] 36; CHECK-NEXT: vmov.i32 q0, #0xff 37; CHECK-NEXT: ldrb r1, [r1, #1] 38; CHECK-NEXT: vmov q1[2], q1[0], r2, r1 39; CHECK-NEXT: vand q0, q1, q0 40; CHECK-NEXT: vmov r1, s2 41; CHECK-NEXT: vmov r2, s0 42; CHECK-NEXT: ldrb r1, [r0, r1] 43; CHECK-NEXT: ldrb r0, [r0, r2] 44; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 45; CHECK-NEXT: bx lr 46entry: 47 %offs = load <2 x i8>, ptr %offptr, align 1 48 %offs.zext = zext <2 x i8> %offs to <2 x i32> 49 %ptrs = getelementptr inbounds i8, ptr %base, <2 x i32> %offs.zext 50 %gather = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i8> undef) 51 ret <2 x i8> %gather 52} 53 54define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_sext(ptr %base, ptr %offptr) { 55; CHECK-LABEL: unscaled_v16i8_sext: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 58; CHECK-NEXT: push {r4, r5, r6, r7, lr} 59; CHECK-NEXT: vldrb.s32 q0, [r1, #12] 60; CHECK-NEXT: vadd.i32 q0, q0, r0 61; CHECK-NEXT: vmov r2, r3, d1 62; CHECK-NEXT: vmov r4, r5, d0 63; CHECK-NEXT: vldrb.s32 q0, [r1] 64; CHECK-NEXT: vadd.i32 q2, q0, r0 65; CHECK-NEXT: vldrb.s32 q0, [r1, #8] 66; CHECK-NEXT: vadd.i32 q1, q0, r0 67; CHECK-NEXT: ldrb.w r12, [r2] 68; CHECK-NEXT: ldrb.w lr, [r3] 69; CHECK-NEXT: ldrb r3, [r4] 70; CHECK-NEXT: ldrb r2, [r5] 71; CHECK-NEXT: vmov r4, r5, d4 72; CHECK-NEXT: ldrb r4, [r4] 73; CHECK-NEXT: ldrb r5, [r5] 74; CHECK-NEXT: vmov.8 q0[0], r4 75; CHECK-NEXT: vmov r4, r6, d3 76; CHECK-NEXT: vmov.8 q0[1], r5 77; CHECK-NEXT: ldrb r5, [r4] 78; CHECK-NEXT: ldrb r4, [r6] 79; CHECK-NEXT: vmov r6, r7, d5 80; CHECK-NEXT: vldrb.s32 q2, [r1, #4] 81; CHECK-NEXT: vadd.i32 q2, q2, r0 82; CHECK-NEXT: ldrb r0, [r6] 83; CHECK-NEXT: ldrb r7, [r7] 84; CHECK-NEXT: vmov.8 q0[2], r0 85; CHECK-NEXT: vmov r0, r1, d4 86; CHECK-NEXT: vmov.8 q0[3], r7 87; CHECK-NEXT: ldrb r0, [r0] 88; CHECK-NEXT: ldrb r1, [r1] 89; CHECK-NEXT: vmov.8 q0[4], r0 90; CHECK-NEXT: vmov.8 q0[5], r1 91; CHECK-NEXT: vmov r0, r1, d5 92; CHECK-NEXT: ldrb r0, [r0] 93; CHECK-NEXT: ldrb r1, [r1] 94; CHECK-NEXT: vmov.8 q0[6], r0 95; CHECK-NEXT: vmov.8 q0[7], r1 96; CHECK-NEXT: vmov r0, r1, d2 97; CHECK-NEXT: ldrb r0, [r0] 98; CHECK-NEXT: ldrb r1, [r1] 99; CHECK-NEXT: vmov.8 q0[8], r0 100; CHECK-NEXT: vmov.8 q0[9], r1 101; CHECK-NEXT: vmov.8 q0[10], r5 102; CHECK-NEXT: vmov.8 q0[11], r4 103; CHECK-NEXT: vmov.8 q0[12], r3 104; CHECK-NEXT: vmov.8 q0[13], r2 105; CHECK-NEXT: vmov.8 q0[14], r12 106; CHECK-NEXT: vmov.8 q0[15], lr 107; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 108entry: 109 %offs = load <16 x i8>, ptr %offptr, align 1 110 %offs.sext = sext <16 x i8> %offs to <16 x i32> 111 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> %offs.sext 112 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 113 ret <16 x i8> %gather 114} 115 116define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i16(ptr %base, ptr %offptr) { 117; CHECK-LABEL: unscaled_v16i8_i16: 118; CHECK: @ %bb.0: @ %entry 119; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 120; CHECK-NEXT: push {r4, r5, r6, r7, lr} 121; CHECK-NEXT: vldrh.s32 q0, [r1, #24] 122; CHECK-NEXT: vadd.i32 q0, q0, r0 123; CHECK-NEXT: vmov r2, r3, d1 124; CHECK-NEXT: vmov r4, r5, d0 125; CHECK-NEXT: vldrh.s32 q0, [r1] 126; CHECK-NEXT: vadd.i32 q2, q0, r0 127; CHECK-NEXT: vldrh.s32 q0, [r1, #16] 128; CHECK-NEXT: vadd.i32 q1, q0, r0 129; CHECK-NEXT: ldrb.w r12, [r2] 130; CHECK-NEXT: ldrb.w lr, [r3] 131; CHECK-NEXT: ldrb r3, [r4] 132; CHECK-NEXT: ldrb r2, [r5] 133; CHECK-NEXT: vmov r4, r5, d4 134; CHECK-NEXT: ldrb r4, [r4] 135; CHECK-NEXT: ldrb r5, [r5] 136; CHECK-NEXT: vmov.8 q0[0], r4 137; CHECK-NEXT: vmov r4, r6, d3 138; CHECK-NEXT: vmov.8 q0[1], r5 139; CHECK-NEXT: ldrb r5, [r4] 140; CHECK-NEXT: ldrb r4, [r6] 141; CHECK-NEXT: vmov r6, r7, d5 142; CHECK-NEXT: vldrh.s32 q2, [r1, #8] 143; CHECK-NEXT: vadd.i32 q2, q2, r0 144; CHECK-NEXT: ldrb r0, [r6] 145; CHECK-NEXT: ldrb r7, [r7] 146; CHECK-NEXT: vmov.8 q0[2], r0 147; CHECK-NEXT: vmov r0, r1, d4 148; CHECK-NEXT: vmov.8 q0[3], r7 149; CHECK-NEXT: ldrb r0, [r0] 150; CHECK-NEXT: ldrb r1, [r1] 151; CHECK-NEXT: vmov.8 q0[4], r0 152; CHECK-NEXT: vmov.8 q0[5], r1 153; CHECK-NEXT: vmov r0, r1, d5 154; CHECK-NEXT: ldrb r0, [r0] 155; CHECK-NEXT: ldrb r1, [r1] 156; CHECK-NEXT: vmov.8 q0[6], r0 157; CHECK-NEXT: vmov.8 q0[7], r1 158; CHECK-NEXT: vmov r0, r1, d2 159; CHECK-NEXT: ldrb r0, [r0] 160; CHECK-NEXT: ldrb r1, [r1] 161; CHECK-NEXT: vmov.8 q0[8], r0 162; CHECK-NEXT: vmov.8 q0[9], r1 163; CHECK-NEXT: vmov.8 q0[10], r5 164; CHECK-NEXT: vmov.8 q0[11], r4 165; CHECK-NEXT: vmov.8 q0[12], r3 166; CHECK-NEXT: vmov.8 q0[13], r2 167; CHECK-NEXT: vmov.8 q0[14], r12 168; CHECK-NEXT: vmov.8 q0[15], lr 169; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 170entry: 171 %offs = load <16 x i16>, ptr %offptr, align 2 172 %offs.sext = sext <16 x i16> %offs to <16 x i32> 173 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> %offs.sext 174 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 175 ret <16 x i8> %gather 176} 177 178define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_scaled(ptr %base, ptr %offptr) { 179; CHECK-LABEL: unscaled_v16i8_scaled: 180; CHECK: @ %bb.0: @ %entry 181; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 182; CHECK-NEXT: push {r4, r5, r6, r7, lr} 183; CHECK-NEXT: vldrb.u32 q0, [r1, #12] 184; CHECK-NEXT: vshl.i32 q0, q0, #2 185; CHECK-NEXT: vadd.i32 q0, q0, r0 186; CHECK-NEXT: vmov r2, r3, d1 187; CHECK-NEXT: vmov r4, r5, d0 188; CHECK-NEXT: vldrb.u32 q0, [r1] 189; CHECK-NEXT: vshl.i32 q0, q0, #2 190; CHECK-NEXT: vadd.i32 q2, q0, r0 191; CHECK-NEXT: vldrb.u32 q0, [r1, #8] 192; CHECK-NEXT: vshl.i32 q0, q0, #2 193; CHECK-NEXT: vadd.i32 q1, q0, r0 194; CHECK-NEXT: ldrb.w r12, [r2] 195; CHECK-NEXT: ldrb.w lr, [r3] 196; CHECK-NEXT: ldrb r3, [r4] 197; CHECK-NEXT: ldrb r2, [r5] 198; CHECK-NEXT: vmov r4, r5, d4 199; CHECK-NEXT: ldrb r4, [r4] 200; CHECK-NEXT: ldrb r5, [r5] 201; CHECK-NEXT: vmov.8 q0[0], r4 202; CHECK-NEXT: vmov r4, r6, d3 203; CHECK-NEXT: vmov.8 q0[1], r5 204; CHECK-NEXT: ldrb r5, [r4] 205; CHECK-NEXT: ldrb r4, [r6] 206; CHECK-NEXT: vmov r6, r7, d5 207; CHECK-NEXT: vldrb.u32 q2, [r1, #4] 208; CHECK-NEXT: vshl.i32 q2, q2, #2 209; CHECK-NEXT: vadd.i32 q2, q2, r0 210; CHECK-NEXT: ldrb r0, [r6] 211; CHECK-NEXT: ldrb r7, [r7] 212; CHECK-NEXT: vmov.8 q0[2], r0 213; CHECK-NEXT: vmov r0, r1, d4 214; CHECK-NEXT: vmov.8 q0[3], r7 215; CHECK-NEXT: ldrb r0, [r0] 216; CHECK-NEXT: ldrb r1, [r1] 217; CHECK-NEXT: vmov.8 q0[4], r0 218; CHECK-NEXT: vmov.8 q0[5], r1 219; CHECK-NEXT: vmov r0, r1, d5 220; CHECK-NEXT: ldrb r0, [r0] 221; CHECK-NEXT: ldrb r1, [r1] 222; CHECK-NEXT: vmov.8 q0[6], r0 223; CHECK-NEXT: vmov.8 q0[7], r1 224; CHECK-NEXT: vmov r0, r1, d2 225; CHECK-NEXT: ldrb r0, [r0] 226; CHECK-NEXT: ldrb r1, [r1] 227; CHECK-NEXT: vmov.8 q0[8], r0 228; CHECK-NEXT: vmov.8 q0[9], r1 229; CHECK-NEXT: vmov.8 q0[10], r5 230; CHECK-NEXT: vmov.8 q0[11], r4 231; CHECK-NEXT: vmov.8 q0[12], r3 232; CHECK-NEXT: vmov.8 q0[13], r2 233; CHECK-NEXT: vmov.8 q0[14], r12 234; CHECK-NEXT: vmov.8 q0[15], lr 235; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 236entry: 237 %offs = load <16 x i8>, ptr %offptr, align 4 238 %offs.zext = zext <16 x i8> %offs to <16 x i32> 239 %ptrs32 = getelementptr inbounds i32, ptr %base, <16 x i32> %offs.zext 240 %ptrs = bitcast <16 x ptr> %ptrs32 to <16 x ptr> 241 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 242 ret <16 x i8> %gather 243} 244 245define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_next(ptr %base, ptr %offptr) { 246; CHECK-LABEL: unscaled_v16i8_i8_next: 247; CHECK: @ %bb.0: @ %entry 248; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 249; CHECK-NEXT: push {r4, r5, r6, r7, lr} 250; CHECK-NEXT: vldrw.u32 q0, [r1, #48] 251; CHECK-NEXT: vadd.i32 q0, q0, r0 252; CHECK-NEXT: vmov r2, r3, d1 253; CHECK-NEXT: vmov r4, r5, d0 254; CHECK-NEXT: vldrw.u32 q0, [r1] 255; CHECK-NEXT: vadd.i32 q2, q0, r0 256; CHECK-NEXT: vldrw.u32 q0, [r1, #32] 257; CHECK-NEXT: vadd.i32 q1, q0, r0 258; CHECK-NEXT: ldrb.w r12, [r2] 259; CHECK-NEXT: ldrb.w lr, [r3] 260; CHECK-NEXT: ldrb r3, [r4] 261; CHECK-NEXT: ldrb r2, [r5] 262; CHECK-NEXT: vmov r4, r5, d4 263; CHECK-NEXT: ldrb r4, [r4] 264; CHECK-NEXT: ldrb r5, [r5] 265; CHECK-NEXT: vmov.8 q0[0], r4 266; CHECK-NEXT: vmov r4, r6, d3 267; CHECK-NEXT: vmov.8 q0[1], r5 268; CHECK-NEXT: ldrb r5, [r4] 269; CHECK-NEXT: ldrb r4, [r6] 270; CHECK-NEXT: vmov r6, r7, d5 271; CHECK-NEXT: vldrw.u32 q2, [r1, #16] 272; CHECK-NEXT: vadd.i32 q2, q2, r0 273; CHECK-NEXT: ldrb r0, [r6] 274; CHECK-NEXT: ldrb r7, [r7] 275; CHECK-NEXT: vmov.8 q0[2], r0 276; CHECK-NEXT: vmov r0, r1, d4 277; CHECK-NEXT: vmov.8 q0[3], r7 278; CHECK-NEXT: ldrb r0, [r0] 279; CHECK-NEXT: ldrb r1, [r1] 280; CHECK-NEXT: vmov.8 q0[4], r0 281; CHECK-NEXT: vmov.8 q0[5], r1 282; CHECK-NEXT: vmov r0, r1, d5 283; CHECK-NEXT: ldrb r0, [r0] 284; CHECK-NEXT: ldrb r1, [r1] 285; CHECK-NEXT: vmov.8 q0[6], r0 286; CHECK-NEXT: vmov.8 q0[7], r1 287; CHECK-NEXT: vmov r0, r1, d2 288; CHECK-NEXT: ldrb r0, [r0] 289; CHECK-NEXT: ldrb r1, [r1] 290; CHECK-NEXT: vmov.8 q0[8], r0 291; CHECK-NEXT: vmov.8 q0[9], r1 292; CHECK-NEXT: vmov.8 q0[10], r5 293; CHECK-NEXT: vmov.8 q0[11], r4 294; CHECK-NEXT: vmov.8 q0[12], r3 295; CHECK-NEXT: vmov.8 q0[13], r2 296; CHECK-NEXT: vmov.8 q0[14], r12 297; CHECK-NEXT: vmov.8 q0[15], lr 298; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 299entry: 300 %offs = load <16 x i32>, ptr %offptr, align 4 301 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> %offs 302 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 303 ret <16 x i8> %gather 304} 305 306define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2gep(ptr %base, ptr %offptr) { 307; CHECK-LABEL: unscaled_v16i8_i8_2gep: 308; CHECK: @ %bb.0: @ %entry 309; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} 310; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} 311; CHECK-NEXT: vldrb.s32 q0, [r1, #12] 312; CHECK-NEXT: movs r6, #5 313; CHECK-NEXT: vldrb.s32 q1, [r1, #8] 314; CHECK-NEXT: vadd.i32 q0, q0, r0 315; CHECK-NEXT: vadd.i32 q0, q0, r6 316; CHECK-NEXT: vadd.i32 q1, q1, r0 317; CHECK-NEXT: vmov r2, r3, d1 318; CHECK-NEXT: vadd.i32 q1, q1, r6 319; CHECK-NEXT: vmov r4, r5, d0 320; CHECK-NEXT: vldrb.s32 q0, [r1] 321; CHECK-NEXT: vadd.i32 q0, q0, r0 322; CHECK-NEXT: vadd.i32 q2, q0, r6 323; CHECK-NEXT: ldrb.w lr, [r3] 324; CHECK-NEXT: ldrb r3, [r4] 325; CHECK-NEXT: ldrb.w r8, [r5] 326; CHECK-NEXT: vmov r4, r5, d4 327; CHECK-NEXT: ldrb.w r12, [r2] 328; CHECK-NEXT: ldrb r4, [r4] 329; CHECK-NEXT: vmov.8 q0[0], r4 330; CHECK-NEXT: ldrb r4, [r5] 331; CHECK-NEXT: vmov.8 q0[1], r4 332; CHECK-NEXT: vmov r4, r7, d3 333; CHECK-NEXT: ldrb r5, [r4] 334; CHECK-NEXT: ldrb r4, [r7] 335; CHECK-NEXT: vmov r7, r2, d5 336; CHECK-NEXT: vldrb.s32 q2, [r1, #4] 337; CHECK-NEXT: vadd.i32 q2, q2, r0 338; CHECK-NEXT: vadd.i32 q2, q2, r6 339; CHECK-NEXT: ldrb r0, [r7] 340; CHECK-NEXT: ldrb r2, [r2] 341; CHECK-NEXT: vmov.8 q0[2], r0 342; CHECK-NEXT: vmov r0, r1, d4 343; CHECK-NEXT: vmov.8 q0[3], r2 344; CHECK-NEXT: ldrb r0, [r0] 345; CHECK-NEXT: ldrb r1, [r1] 346; CHECK-NEXT: vmov.8 q0[4], r0 347; CHECK-NEXT: vmov.8 q0[5], r1 348; CHECK-NEXT: vmov r0, r1, d5 349; CHECK-NEXT: ldrb r0, [r0] 350; CHECK-NEXT: ldrb r1, [r1] 351; CHECK-NEXT: vmov.8 q0[6], r0 352; CHECK-NEXT: vmov.8 q0[7], r1 353; CHECK-NEXT: vmov r0, r1, d2 354; CHECK-NEXT: ldrb r0, [r0] 355; CHECK-NEXT: ldrb r1, [r1] 356; CHECK-NEXT: vmov.8 q0[8], r0 357; CHECK-NEXT: vmov.8 q0[9], r1 358; CHECK-NEXT: vmov.8 q0[10], r5 359; CHECK-NEXT: vmov.8 q0[11], r4 360; CHECK-NEXT: vmov.8 q0[12], r3 361; CHECK-NEXT: vmov.8 q0[13], r8 362; CHECK-NEXT: vmov.8 q0[14], r12 363; CHECK-NEXT: vmov.8 q0[15], lr 364; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} 365entry: 366 %offs = load <16 x i8>, ptr %offptr, align 1 367 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %offs 368 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i8 5 369 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 370 ret <16 x i8> %gather 371} 372 373 374define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2gep2(ptr %base, ptr %offptr) { 375; CHECK-LABEL: unscaled_v16i8_i8_2gep2: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: adr r1, .LCPI8_0 378; CHECK-NEXT: vldrw.u32 q1, [r1] 379; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 380; CHECK-NEXT: bx lr 381; CHECK-NEXT: .p2align 4 382; CHECK-NEXT: @ %bb.1: 383; CHECK-NEXT: .LCPI8_0: 384; CHECK-NEXT: .byte 5 @ 0x5 385; CHECK-NEXT: .byte 8 @ 0x8 386; CHECK-NEXT: .byte 11 @ 0xb 387; CHECK-NEXT: .byte 14 @ 0xe 388; CHECK-NEXT: .byte 17 @ 0x11 389; CHECK-NEXT: .byte 20 @ 0x14 390; CHECK-NEXT: .byte 23 @ 0x17 391; CHECK-NEXT: .byte 26 @ 0x1a 392; CHECK-NEXT: .byte 29 @ 0x1d 393; CHECK-NEXT: .byte 32 @ 0x20 394; CHECK-NEXT: .byte 35 @ 0x23 395; CHECK-NEXT: .byte 38 @ 0x26 396; CHECK-NEXT: .byte 41 @ 0x29 397; CHECK-NEXT: .byte 44 @ 0x2c 398; CHECK-NEXT: .byte 47 @ 0x2f 399; CHECK-NEXT: .byte 50 @ 0x32 400entry: 401 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> 402 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i8 5 403 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 404 ret <16 x i8> %gather 405} 406 407 408define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep(ptr %base) { 409; CHECK-LABEL: unscaled_v16i8_i8_biggep: 410; CHECK: @ %bb.0: @ %entry 411; CHECK-NEXT: adr r1, .LCPI9_0 412; CHECK-NEXT: vldrw.u32 q1, [r1] 413; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 414; CHECK-NEXT: bx lr 415; CHECK-NEXT: .p2align 4 416; CHECK-NEXT: @ %bb.1: 417; CHECK-NEXT: .LCPI9_0: 418; CHECK-NEXT: .byte 5 @ 0x5 419; CHECK-NEXT: .byte 8 @ 0x8 420; CHECK-NEXT: .byte 11 @ 0xb 421; CHECK-NEXT: .byte 14 @ 0xe 422; CHECK-NEXT: .byte 17 @ 0x11 423; CHECK-NEXT: .byte 20 @ 0x14 424; CHECK-NEXT: .byte 23 @ 0x17 425; CHECK-NEXT: .byte 26 @ 0x1a 426; CHECK-NEXT: .byte 29 @ 0x1d 427; CHECK-NEXT: .byte 32 @ 0x20 428; CHECK-NEXT: .byte 35 @ 0x23 429; CHECK-NEXT: .byte 38 @ 0x26 430; CHECK-NEXT: .byte 41 @ 0x29 431; CHECK-NEXT: .byte 44 @ 0x2c 432; CHECK-NEXT: .byte 47 @ 0x2f 433; CHECK-NEXT: .byte 50 @ 0x32 434entry: 435 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 436 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i32 5 437 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 438 ret <16 x i8> %gather 439} 440 441 442define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep2(ptr %base) { 443; CHECK-LABEL: unscaled_v16i8_i8_biggep2: 444; CHECK: @ %bb.0: @ %entry 445; CHECK-NEXT: adr r1, .LCPI10_0 446; CHECK-NEXT: vldrw.u32 q1, [r1] 447; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 448; CHECK-NEXT: bx lr 449; CHECK-NEXT: .p2align 4 450; CHECK-NEXT: @ %bb.1: 451; CHECK-NEXT: .LCPI10_0: 452; CHECK-NEXT: .byte 0 @ 0x0 453; CHECK-NEXT: .byte 3 @ 0x3 454; CHECK-NEXT: .byte 6 @ 0x6 455; CHECK-NEXT: .byte 9 @ 0x9 456; CHECK-NEXT: .byte 12 @ 0xc 457; CHECK-NEXT: .byte 15 @ 0xf 458; CHECK-NEXT: .byte 18 @ 0x12 459; CHECK-NEXT: .byte 21 @ 0x15 460; CHECK-NEXT: .byte 24 @ 0x18 461; CHECK-NEXT: .byte 27 @ 0x1b 462; CHECK-NEXT: .byte 30 @ 0x1e 463; CHECK-NEXT: .byte 33 @ 0x21 464; CHECK-NEXT: .byte 36 @ 0x24 465; CHECK-NEXT: .byte 39 @ 0x27 466; CHECK-NEXT: .byte 42 @ 0x2a 467; CHECK-NEXT: .byte 45 @ 0x2d 468entry: 469 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 470 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 471 ret <16 x i8> %gather 472} 473 474 475define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep3(ptr %base) { 476; CHECK-LABEL: unscaled_v16i8_i8_biggep3: 477; CHECK: @ %bb.0: @ %entry 478; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 479; CHECK-NEXT: push {r4, r5, r6, r7, lr} 480; CHECK-NEXT: adr r1, .LCPI11_0 481; CHECK-NEXT: adr r4, .LCPI11_1 482; CHECK-NEXT: vldrw.u32 q0, [r1] 483; CHECK-NEXT: adr r7, .LCPI11_3 484; CHECK-NEXT: vadd.i32 q0, q0, r0 485; CHECK-NEXT: vmov r1, r2, d1 486; CHECK-NEXT: vmov r3, r5, d0 487; CHECK-NEXT: vldrw.u32 q0, [r4] 488; CHECK-NEXT: vadd.i32 q1, q0, r0 489; CHECK-NEXT: vmov r4, r6, d3 490; CHECK-NEXT: ldrb.w lr, [r1] 491; CHECK-NEXT: adr r1, .LCPI11_2 492; CHECK-NEXT: vldrw.u32 q0, [r1] 493; CHECK-NEXT: ldrb.w r12, [r2] 494; CHECK-NEXT: ldrb r1, [r5] 495; CHECK-NEXT: vadd.i32 q2, q0, r0 496; CHECK-NEXT: ldrb r3, [r3] 497; CHECK-NEXT: ldrb r2, [r6] 498; CHECK-NEXT: vmov r5, r6, d4 499; CHECK-NEXT: ldrb r4, [r4] 500; CHECK-NEXT: ldrb r5, [r5] 501; CHECK-NEXT: vmov.8 q0[0], r5 502; CHECK-NEXT: ldrb r5, [r6] 503; CHECK-NEXT: vmov.8 q0[1], r5 504; CHECK-NEXT: vmov r5, r6, d5 505; CHECK-NEXT: vldrw.u32 q2, [r7] 506; CHECK-NEXT: vadd.i32 q2, q2, r0 507; CHECK-NEXT: ldrb r0, [r5] 508; CHECK-NEXT: ldrb r6, [r6] 509; CHECK-NEXT: vmov.8 q0[2], r0 510; CHECK-NEXT: vmov r0, r5, d4 511; CHECK-NEXT: vmov.8 q0[3], r6 512; CHECK-NEXT: ldrb r0, [r0] 513; CHECK-NEXT: ldrb r5, [r5] 514; CHECK-NEXT: vmov.8 q0[4], r0 515; CHECK-NEXT: vmov.8 q0[5], r5 516; CHECK-NEXT: vmov r0, r5, d5 517; CHECK-NEXT: ldrb r0, [r0] 518; CHECK-NEXT: ldrb r5, [r5] 519; CHECK-NEXT: vmov.8 q0[6], r0 520; CHECK-NEXT: vmov.8 q0[7], r5 521; CHECK-NEXT: vmov r0, r5, d2 522; CHECK-NEXT: ldrb r0, [r0] 523; CHECK-NEXT: ldrb r5, [r5] 524; CHECK-NEXT: vmov.8 q0[8], r0 525; CHECK-NEXT: vmov.8 q0[9], r5 526; CHECK-NEXT: vmov.8 q0[10], r4 527; CHECK-NEXT: vmov.8 q0[11], r2 528; CHECK-NEXT: vmov.8 q0[12], r3 529; CHECK-NEXT: vmov.8 q0[13], r1 530; CHECK-NEXT: vmov.8 q0[14], lr 531; CHECK-NEXT: vmov.8 q0[15], r12 532; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 533; CHECK-NEXT: .p2align 4 534; CHECK-NEXT: @ %bb.1: 535; CHECK-NEXT: .LCPI11_0: 536; CHECK-NEXT: .long 292 @ 0x124 537; CHECK-NEXT: .long 295 @ 0x127 538; CHECK-NEXT: .long 298 @ 0x12a 539; CHECK-NEXT: .long 301 @ 0x12d 540; CHECK-NEXT: .LCPI11_1: 541; CHECK-NEXT: .long 280 @ 0x118 542; CHECK-NEXT: .long 283 @ 0x11b 543; CHECK-NEXT: .long 286 @ 0x11e 544; CHECK-NEXT: .long 289 @ 0x121 545; CHECK-NEXT: .LCPI11_2: 546; CHECK-NEXT: .long 256 @ 0x100 547; CHECK-NEXT: .long 259 @ 0x103 548; CHECK-NEXT: .long 262 @ 0x106 549; CHECK-NEXT: .long 265 @ 0x109 550; CHECK-NEXT: .LCPI11_3: 551; CHECK-NEXT: .long 268 @ 0x10c 552; CHECK-NEXT: .long 271 @ 0x10f 553; CHECK-NEXT: .long 274 @ 0x112 554; CHECK-NEXT: .long 277 @ 0x115 555entry: 556 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 557 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i32 256 558 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 559 ret <16 x i8> %gather 560} 561 562 563define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep4(ptr %base) { 564; CHECK-LABEL: unscaled_v16i8_i8_biggep4: 565; CHECK: @ %bb.0: @ %entry 566; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 567; CHECK-NEXT: push {r4, r5, r6, r7, lr} 568; CHECK-NEXT: adr r1, .LCPI12_0 569; CHECK-NEXT: adr r4, .LCPI12_1 570; CHECK-NEXT: vldrw.u32 q0, [r1] 571; CHECK-NEXT: adr r7, .LCPI12_3 572; CHECK-NEXT: vadd.i32 q0, q0, r0 573; CHECK-NEXT: vmov r1, r2, d1 574; CHECK-NEXT: vmov r3, r5, d0 575; CHECK-NEXT: vldrw.u32 q0, [r4] 576; CHECK-NEXT: vadd.i32 q1, q0, r0 577; CHECK-NEXT: vmov r4, r6, d3 578; CHECK-NEXT: ldrb.w lr, [r1] 579; CHECK-NEXT: adr r1, .LCPI12_2 580; CHECK-NEXT: vldrw.u32 q0, [r1] 581; CHECK-NEXT: ldrb.w r12, [r2] 582; CHECK-NEXT: ldrb r1, [r5] 583; CHECK-NEXT: vadd.i32 q2, q0, r0 584; CHECK-NEXT: ldrb r3, [r3] 585; CHECK-NEXT: ldrb r2, [r6] 586; CHECK-NEXT: vmov r5, r6, d4 587; CHECK-NEXT: ldrb r4, [r4] 588; CHECK-NEXT: ldrb r5, [r5] 589; CHECK-NEXT: vmov.8 q0[0], r5 590; CHECK-NEXT: ldrb r5, [r6] 591; CHECK-NEXT: vmov.8 q0[1], r5 592; CHECK-NEXT: vmov r5, r6, d5 593; CHECK-NEXT: vldrw.u32 q2, [r7] 594; CHECK-NEXT: vadd.i32 q2, q2, r0 595; CHECK-NEXT: ldrb r0, [r5] 596; CHECK-NEXT: ldrb r6, [r6] 597; CHECK-NEXT: vmov.8 q0[2], r0 598; CHECK-NEXT: vmov r0, r5, d4 599; CHECK-NEXT: vmov.8 q0[3], r6 600; CHECK-NEXT: ldrb r0, [r0] 601; CHECK-NEXT: ldrb r5, [r5] 602; CHECK-NEXT: vmov.8 q0[4], r0 603; CHECK-NEXT: vmov.8 q0[5], r5 604; CHECK-NEXT: vmov r0, r5, d5 605; CHECK-NEXT: ldrb r0, [r0] 606; CHECK-NEXT: ldrb r5, [r5] 607; CHECK-NEXT: vmov.8 q0[6], r0 608; CHECK-NEXT: vmov.8 q0[7], r5 609; CHECK-NEXT: vmov r0, r5, d2 610; CHECK-NEXT: ldrb r0, [r0] 611; CHECK-NEXT: ldrb r5, [r5] 612; CHECK-NEXT: vmov.8 q0[8], r0 613; CHECK-NEXT: vmov.8 q0[9], r5 614; CHECK-NEXT: vmov.8 q0[10], r4 615; CHECK-NEXT: vmov.8 q0[11], r2 616; CHECK-NEXT: vmov.8 q0[12], r3 617; CHECK-NEXT: vmov.8 q0[13], r1 618; CHECK-NEXT: vmov.8 q0[14], lr 619; CHECK-NEXT: vmov.8 q0[15], r12 620; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 621; CHECK-NEXT: .p2align 4 622; CHECK-NEXT: @ %bb.1: 623; CHECK-NEXT: .LCPI12_0: 624; CHECK-NEXT: .long 36 @ 0x24 625; CHECK-NEXT: .long 39 @ 0x27 626; CHECK-NEXT: .long 42 @ 0x2a 627; CHECK-NEXT: .long 45 @ 0x2d 628; CHECK-NEXT: .LCPI12_1: 629; CHECK-NEXT: .long 256 @ 0x100 630; CHECK-NEXT: .long 27 @ 0x1b 631; CHECK-NEXT: .long 30 @ 0x1e 632; CHECK-NEXT: .long 33 @ 0x21 633; CHECK-NEXT: .LCPI12_2: 634; CHECK-NEXT: .long 0 @ 0x0 635; CHECK-NEXT: .long 3 @ 0x3 636; CHECK-NEXT: .long 6 @ 0x6 637; CHECK-NEXT: .long 9 @ 0x9 638; CHECK-NEXT: .LCPI12_3: 639; CHECK-NEXT: .long 12 @ 0xc 640; CHECK-NEXT: .long 15 @ 0xf 641; CHECK-NEXT: .long 18 @ 0x12 642; CHECK-NEXT: .long 21 @ 0x15 643entry: 644 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 256, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 645 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 646 ret <16 x i8> %gather 647} 648 649 650define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep5(<16 x ptr> %base) { 651; CHECK-LABEL: unscaled_v16i8_i8_biggep5: 652; CHECK: @ %bb.0: @ %entry 653; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 654; CHECK-NEXT: push {r4, r5, r6, r7, lr} 655; CHECK-NEXT: mov.w r4, #256 656; CHECK-NEXT: vadd.i32 q3, q3, r4 657; CHECK-NEXT: vadd.i32 q2, q2, r4 658; CHECK-NEXT: vmov r3, r2, d7 659; CHECK-NEXT: vadd.i32 q1, q1, r4 660; CHECK-NEXT: vmov r0, r1, d6 661; CHECK-NEXT: vadd.i32 q3, q0, r4 662; CHECK-NEXT: vmov r6, r7, d5 663; CHECK-NEXT: ldrb.w lr, [r3] 664; CHECK-NEXT: ldrb r3, [r1] 665; CHECK-NEXT: ldrb.w r12, [r2] 666; CHECK-NEXT: ldrb r1, [r6] 667; CHECK-NEXT: vmov r2, r6, d6 668; CHECK-NEXT: ldrb r5, [r0] 669; CHECK-NEXT: ldrb r0, [r7] 670; CHECK-NEXT: ldrb r2, [r2] 671; CHECK-NEXT: vmov.8 q0[0], r2 672; CHECK-NEXT: ldrb r2, [r6] 673; CHECK-NEXT: vmov.8 q0[1], r2 674; CHECK-NEXT: vmov r2, r6, d7 675; CHECK-NEXT: ldrb r2, [r2] 676; CHECK-NEXT: ldrb r6, [r6] 677; CHECK-NEXT: vmov.8 q0[2], r2 678; CHECK-NEXT: vmov r2, r4, d2 679; CHECK-NEXT: vmov.8 q0[3], r6 680; CHECK-NEXT: ldrb r2, [r2] 681; CHECK-NEXT: ldrb r4, [r4] 682; CHECK-NEXT: vmov.8 q0[4], r2 683; CHECK-NEXT: vmov.8 q0[5], r4 684; CHECK-NEXT: vmov r2, r4, d3 685; CHECK-NEXT: ldrb r2, [r2] 686; CHECK-NEXT: ldrb r4, [r4] 687; CHECK-NEXT: vmov.8 q0[6], r2 688; CHECK-NEXT: vmov.8 q0[7], r4 689; CHECK-NEXT: vmov r2, r4, d4 690; CHECK-NEXT: ldrb r2, [r2] 691; CHECK-NEXT: ldrb r4, [r4] 692; CHECK-NEXT: vmov.8 q0[8], r2 693; CHECK-NEXT: vmov.8 q0[9], r4 694; CHECK-NEXT: vmov.8 q0[10], r1 695; CHECK-NEXT: vmov.8 q0[11], r0 696; CHECK-NEXT: vmov.8 q0[12], r5 697; CHECK-NEXT: vmov.8 q0[13], r3 698; CHECK-NEXT: vmov.8 q0[14], lr 699; CHECK-NEXT: vmov.8 q0[15], r12 700; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 701entry: 702 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %base, i32 256 703 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 704 ret <16 x i8> %gather 705} 706 707 708define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep6(ptr %base) { 709; CHECK-LABEL: unscaled_v16i8_i8_biggep6: 710; CHECK: @ %bb.0: @ %entry 711; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 712; CHECK-NEXT: push {r4, r5, r6, r7, lr} 713; CHECK-NEXT: adr r1, .LCPI14_0 714; CHECK-NEXT: adr r4, .LCPI14_1 715; CHECK-NEXT: vldrw.u32 q0, [r1] 716; CHECK-NEXT: adr r7, .LCPI14_3 717; CHECK-NEXT: vadd.i32 q0, q0, r0 718; CHECK-NEXT: vmov r1, r2, d1 719; CHECK-NEXT: vmov r3, r5, d0 720; CHECK-NEXT: vldrw.u32 q0, [r4] 721; CHECK-NEXT: vadd.i32 q1, q0, r0 722; CHECK-NEXT: vmov r4, r6, d3 723; CHECK-NEXT: ldrb.w lr, [r1] 724; CHECK-NEXT: adr r1, .LCPI14_2 725; CHECK-NEXT: vldrw.u32 q0, [r1] 726; CHECK-NEXT: ldrb.w r12, [r2] 727; CHECK-NEXT: ldrb r1, [r5] 728; CHECK-NEXT: vadd.i32 q2, q0, r0 729; CHECK-NEXT: ldrb r3, [r3] 730; CHECK-NEXT: ldrb r2, [r6] 731; CHECK-NEXT: vmov r5, r6, d4 732; CHECK-NEXT: ldrb r4, [r4] 733; CHECK-NEXT: ldrb r5, [r5] 734; CHECK-NEXT: vmov.8 q0[0], r5 735; CHECK-NEXT: ldrb r5, [r6] 736; CHECK-NEXT: vmov.8 q0[1], r5 737; CHECK-NEXT: vmov r5, r6, d5 738; CHECK-NEXT: vldrw.u32 q2, [r7] 739; CHECK-NEXT: vadd.i32 q2, q2, r0 740; CHECK-NEXT: ldrb r0, [r5] 741; CHECK-NEXT: ldrb r6, [r6] 742; CHECK-NEXT: vmov.8 q0[2], r0 743; CHECK-NEXT: vmov r0, r5, d4 744; CHECK-NEXT: vmov.8 q0[3], r6 745; CHECK-NEXT: ldrb r0, [r0] 746; CHECK-NEXT: ldrb r5, [r5] 747; CHECK-NEXT: vmov.8 q0[4], r0 748; CHECK-NEXT: vmov.8 q0[5], r5 749; CHECK-NEXT: vmov r0, r5, d5 750; CHECK-NEXT: ldrb r0, [r0] 751; CHECK-NEXT: ldrb r5, [r5] 752; CHECK-NEXT: vmov.8 q0[6], r0 753; CHECK-NEXT: vmov.8 q0[7], r5 754; CHECK-NEXT: vmov r0, r5, d2 755; CHECK-NEXT: ldrb r0, [r0] 756; CHECK-NEXT: ldrb r5, [r5] 757; CHECK-NEXT: vmov.8 q0[8], r0 758; CHECK-NEXT: vmov.8 q0[9], r5 759; CHECK-NEXT: vmov.8 q0[10], r4 760; CHECK-NEXT: vmov.8 q0[11], r2 761; CHECK-NEXT: vmov.8 q0[12], r3 762; CHECK-NEXT: vmov.8 q0[13], r1 763; CHECK-NEXT: vmov.8 q0[14], lr 764; CHECK-NEXT: vmov.8 q0[15], r12 765; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 766; CHECK-NEXT: .p2align 4 767; CHECK-NEXT: @ %bb.1: 768; CHECK-NEXT: .LCPI14_0: 769; CHECK-NEXT: .long 37 @ 0x25 770; CHECK-NEXT: .long 40 @ 0x28 771; CHECK-NEXT: .long 43 @ 0x2b 772; CHECK-NEXT: .long 46 @ 0x2e 773; CHECK-NEXT: .LCPI14_1: 774; CHECK-NEXT: .long 257 @ 0x101 775; CHECK-NEXT: .long 28 @ 0x1c 776; CHECK-NEXT: .long 31 @ 0x1f 777; CHECK-NEXT: .long 34 @ 0x22 778; CHECK-NEXT: .LCPI14_2: 779; CHECK-NEXT: .long 1 @ 0x1 780; CHECK-NEXT: .long 4 @ 0x4 781; CHECK-NEXT: .long 7 @ 0x7 782; CHECK-NEXT: .long 10 @ 0xa 783; CHECK-NEXT: .LCPI14_3: 784; CHECK-NEXT: .long 13 @ 0xd 785; CHECK-NEXT: .long 16 @ 0x10 786; CHECK-NEXT: .long 19 @ 0x13 787; CHECK-NEXT: .long 22 @ 0x16 788entry: 789 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 256, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 790 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i32 1 791 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 792 ret <16 x i8> %gather 793} 794 795 796define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_biggep7(ptr %base) { 797; CHECK-LABEL: unscaled_v16i8_i8_biggep7: 798; CHECK: @ %bb.0: @ %entry 799; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 800; CHECK-NEXT: push {r4, r5, r6, r7, lr} 801; CHECK-NEXT: adr r1, .LCPI15_0 802; CHECK-NEXT: adr r4, .LCPI15_1 803; CHECK-NEXT: vldrw.u32 q0, [r1] 804; CHECK-NEXT: adr r7, .LCPI15_3 805; CHECK-NEXT: vadd.i32 q0, q0, r0 806; CHECK-NEXT: vmov r1, r2, d1 807; CHECK-NEXT: vmov r3, r5, d0 808; CHECK-NEXT: vldrw.u32 q0, [r4] 809; CHECK-NEXT: vadd.i32 q1, q0, r0 810; CHECK-NEXT: vmov r4, r6, d3 811; CHECK-NEXT: ldrb.w lr, [r1] 812; CHECK-NEXT: adr r1, .LCPI15_2 813; CHECK-NEXT: vldrw.u32 q0, [r1] 814; CHECK-NEXT: ldrb.w r12, [r2] 815; CHECK-NEXT: ldrb r1, [r5] 816; CHECK-NEXT: vadd.i32 q2, q0, r0 817; CHECK-NEXT: ldrb r3, [r3] 818; CHECK-NEXT: ldrb r2, [r6] 819; CHECK-NEXT: vmov r5, r6, d4 820; CHECK-NEXT: ldrb r4, [r4] 821; CHECK-NEXT: ldrb r5, [r5] 822; CHECK-NEXT: vmov.8 q0[0], r5 823; CHECK-NEXT: ldrb r5, [r6] 824; CHECK-NEXT: vmov.8 q0[1], r5 825; CHECK-NEXT: vmov r5, r6, d5 826; CHECK-NEXT: vldrw.u32 q2, [r7] 827; CHECK-NEXT: vadd.i32 q2, q2, r0 828; CHECK-NEXT: ldrb r0, [r5] 829; CHECK-NEXT: ldrb r6, [r6] 830; CHECK-NEXT: vmov.8 q0[2], r0 831; CHECK-NEXT: vmov r0, r5, d4 832; CHECK-NEXT: vmov.8 q0[3], r6 833; CHECK-NEXT: ldrb r0, [r0] 834; CHECK-NEXT: ldrb r5, [r5] 835; CHECK-NEXT: vmov.8 q0[4], r0 836; CHECK-NEXT: vmov.8 q0[5], r5 837; CHECK-NEXT: vmov r0, r5, d5 838; CHECK-NEXT: ldrb r0, [r0] 839; CHECK-NEXT: ldrb r5, [r5] 840; CHECK-NEXT: vmov.8 q0[6], r0 841; CHECK-NEXT: vmov.8 q0[7], r5 842; CHECK-NEXT: vmov r0, r5, d2 843; CHECK-NEXT: ldrb r0, [r0] 844; CHECK-NEXT: ldrb r5, [r5] 845; CHECK-NEXT: vmov.8 q0[8], r0 846; CHECK-NEXT: vmov.8 q0[9], r5 847; CHECK-NEXT: vmov.8 q0[10], r4 848; CHECK-NEXT: vmov.8 q0[11], r2 849; CHECK-NEXT: vmov.8 q0[12], r3 850; CHECK-NEXT: vmov.8 q0[13], r1 851; CHECK-NEXT: vmov.8 q0[14], lr 852; CHECK-NEXT: vmov.8 q0[15], r12 853; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 854; CHECK-NEXT: .p2align 4 855; CHECK-NEXT: @ %bb.1: 856; CHECK-NEXT: .LCPI15_0: 857; CHECK-NEXT: .long 236 @ 0xec 858; CHECK-NEXT: .long 239 @ 0xef 859; CHECK-NEXT: .long 242 @ 0xf2 860; CHECK-NEXT: .long 245 @ 0xf5 861; CHECK-NEXT: .LCPI15_1: 862; CHECK-NEXT: .long 224 @ 0xe0 863; CHECK-NEXT: .long 227 @ 0xe3 864; CHECK-NEXT: .long 230 @ 0xe6 865; CHECK-NEXT: .long 233 @ 0xe9 866; CHECK-NEXT: .LCPI15_2: 867; CHECK-NEXT: .long 300 @ 0x12c 868; CHECK-NEXT: .long 203 @ 0xcb 869; CHECK-NEXT: .long 206 @ 0xce 870; CHECK-NEXT: .long 209 @ 0xd1 871; CHECK-NEXT: .LCPI15_3: 872; CHECK-NEXT: .long 212 @ 0xd4 873; CHECK-NEXT: .long 215 @ 0xd7 874; CHECK-NEXT: .long 218 @ 0xda 875; CHECK-NEXT: .long 221 @ 0xdd 876entry: 877 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i32> <i32 100, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 878 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i32 200 879 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 880 ret <16 x i8> %gather 881} 882 883 884define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_2(ptr %base, ptr %offptr) { 885; CHECK-LABEL: unscaled_v16i8_i8_2: 886; CHECK: @ %bb.0: @ %entry 887; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 888; CHECK-NEXT: push {r4, r5, r6, r7, lr} 889; CHECK-NEXT: vldrb.s32 q0, [r1, #12] 890; CHECK-NEXT: vadd.i32 q0, q0, r0 891; CHECK-NEXT: vmov r2, r3, d1 892; CHECK-NEXT: vmov r4, r5, d0 893; CHECK-NEXT: vldrb.s32 q0, [r1] 894; CHECK-NEXT: vadd.i32 q2, q0, r0 895; CHECK-NEXT: vldrb.s32 q0, [r1, #8] 896; CHECK-NEXT: vadd.i32 q1, q0, r0 897; CHECK-NEXT: ldrb.w r12, [r2] 898; CHECK-NEXT: ldrb.w lr, [r3] 899; CHECK-NEXT: ldrb r3, [r4] 900; CHECK-NEXT: ldrb r2, [r5] 901; CHECK-NEXT: vmov r4, r5, d4 902; CHECK-NEXT: ldrb r4, [r4] 903; CHECK-NEXT: ldrb r5, [r5] 904; CHECK-NEXT: vmov.8 q0[0], r4 905; CHECK-NEXT: vmov r4, r6, d3 906; CHECK-NEXT: vmov.8 q0[1], r5 907; CHECK-NEXT: ldrb r5, [r4] 908; CHECK-NEXT: ldrb r4, [r6] 909; CHECK-NEXT: vmov r6, r7, d5 910; CHECK-NEXT: vldrb.s32 q2, [r1, #4] 911; CHECK-NEXT: vadd.i32 q2, q2, r0 912; CHECK-NEXT: ldrb r0, [r6] 913; CHECK-NEXT: ldrb r7, [r7] 914; CHECK-NEXT: vmov.8 q0[2], r0 915; CHECK-NEXT: vmov r0, r1, d4 916; CHECK-NEXT: vmov.8 q0[3], r7 917; CHECK-NEXT: ldrb r0, [r0] 918; CHECK-NEXT: ldrb r1, [r1] 919; CHECK-NEXT: vmov.8 q0[4], r0 920; CHECK-NEXT: vmov.8 q0[5], r1 921; CHECK-NEXT: vmov r0, r1, d5 922; CHECK-NEXT: ldrb r0, [r0] 923; CHECK-NEXT: ldrb r1, [r1] 924; CHECK-NEXT: vmov.8 q0[6], r0 925; CHECK-NEXT: vmov.8 q0[7], r1 926; CHECK-NEXT: vmov r0, r1, d2 927; CHECK-NEXT: ldrb r0, [r0] 928; CHECK-NEXT: ldrb r1, [r1] 929; CHECK-NEXT: vmov.8 q0[8], r0 930; CHECK-NEXT: vmov.8 q0[9], r1 931; CHECK-NEXT: vmov.8 q0[10], r5 932; CHECK-NEXT: vmov.8 q0[11], r4 933; CHECK-NEXT: vmov.8 q0[12], r3 934; CHECK-NEXT: vmov.8 q0[13], r2 935; CHECK-NEXT: vmov.8 q0[14], r12 936; CHECK-NEXT: vmov.8 q0[15], lr 937; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 938entry: 939 %offs = load <16 x i8>, ptr %offptr, align 1 940 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %offs 941 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 942 ret <16 x i8> %gather 943} 944 945 946define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_3(ptr %base, ptr %offptr) { 947; CHECK-LABEL: unscaled_v16i8_i8_3: 948; CHECK: @ %bb.0: @ %entry 949; CHECK-NEXT: adr r1, .LCPI17_0 950; CHECK-NEXT: vldrw.u32 q1, [r1] 951; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 952; CHECK-NEXT: bx lr 953; CHECK-NEXT: .p2align 4 954; CHECK-NEXT: @ %bb.1: 955; CHECK-NEXT: .LCPI17_0: 956; CHECK-NEXT: .byte 0 @ 0x0 957; CHECK-NEXT: .byte 3 @ 0x3 958; CHECK-NEXT: .byte 6 @ 0x6 959; CHECK-NEXT: .byte 9 @ 0x9 960; CHECK-NEXT: .byte 12 @ 0xc 961; CHECK-NEXT: .byte 15 @ 0xf 962; CHECK-NEXT: .byte 18 @ 0x12 963; CHECK-NEXT: .byte 21 @ 0x15 964; CHECK-NEXT: .byte 24 @ 0x18 965; CHECK-NEXT: .byte 27 @ 0x1b 966; CHECK-NEXT: .byte 30 @ 0x1e 967; CHECK-NEXT: .byte 33 @ 0x21 968; CHECK-NEXT: .byte 36 @ 0x24 969; CHECK-NEXT: .byte 39 @ 0x27 970; CHECK-NEXT: .byte 42 @ 0x2a 971; CHECK-NEXT: .byte 45 @ 0x2d 972entry: 973 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> 974 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 975 ret <16 x i8> %gather 976} 977 978define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_basei16(ptr %base, ptr %offptr) { 979; CHECK-LABEL: unscaled_v16i8_basei16: 980; CHECK: @ %bb.0: @ %entry 981; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 982; CHECK-NEXT: push {r4, r5, r6, r7, lr} 983; CHECK-NEXT: vldrb.u32 q0, [r1, #12] 984; CHECK-NEXT: vshl.i32 q0, q0, #1 985; CHECK-NEXT: vadd.i32 q0, q0, r0 986; CHECK-NEXT: vmov r2, r3, d1 987; CHECK-NEXT: vmov r4, r5, d0 988; CHECK-NEXT: vldrb.u32 q0, [r1] 989; CHECK-NEXT: vshl.i32 q0, q0, #1 990; CHECK-NEXT: vadd.i32 q2, q0, r0 991; CHECK-NEXT: vldrb.u32 q0, [r1, #8] 992; CHECK-NEXT: vshl.i32 q0, q0, #1 993; CHECK-NEXT: vadd.i32 q1, q0, r0 994; CHECK-NEXT: ldrb.w r12, [r2] 995; CHECK-NEXT: ldrb.w lr, [r3] 996; CHECK-NEXT: ldrb r3, [r4] 997; CHECK-NEXT: ldrb r2, [r5] 998; CHECK-NEXT: vmov r4, r5, d4 999; CHECK-NEXT: ldrb r4, [r4] 1000; CHECK-NEXT: ldrb r5, [r5] 1001; CHECK-NEXT: vmov.8 q0[0], r4 1002; CHECK-NEXT: vmov r4, r6, d3 1003; CHECK-NEXT: vmov.8 q0[1], r5 1004; CHECK-NEXT: ldrb r5, [r4] 1005; CHECK-NEXT: ldrb r4, [r6] 1006; CHECK-NEXT: vmov r6, r7, d5 1007; CHECK-NEXT: vldrb.u32 q2, [r1, #4] 1008; CHECK-NEXT: vshl.i32 q2, q2, #1 1009; CHECK-NEXT: vadd.i32 q2, q2, r0 1010; CHECK-NEXT: ldrb r0, [r6] 1011; CHECK-NEXT: ldrb r7, [r7] 1012; CHECK-NEXT: vmov.8 q0[2], r0 1013; CHECK-NEXT: vmov r0, r1, d4 1014; CHECK-NEXT: vmov.8 q0[3], r7 1015; CHECK-NEXT: ldrb r0, [r0] 1016; CHECK-NEXT: ldrb r1, [r1] 1017; CHECK-NEXT: vmov.8 q0[4], r0 1018; CHECK-NEXT: vmov.8 q0[5], r1 1019; CHECK-NEXT: vmov r0, r1, d5 1020; CHECK-NEXT: ldrb r0, [r0] 1021; CHECK-NEXT: ldrb r1, [r1] 1022; CHECK-NEXT: vmov.8 q0[6], r0 1023; CHECK-NEXT: vmov.8 q0[7], r1 1024; CHECK-NEXT: vmov r0, r1, d2 1025; CHECK-NEXT: ldrb r0, [r0] 1026; CHECK-NEXT: ldrb r1, [r1] 1027; CHECK-NEXT: vmov.8 q0[8], r0 1028; CHECK-NEXT: vmov.8 q0[9], r1 1029; CHECK-NEXT: vmov.8 q0[10], r5 1030; CHECK-NEXT: vmov.8 q0[11], r4 1031; CHECK-NEXT: vmov.8 q0[12], r3 1032; CHECK-NEXT: vmov.8 q0[13], r2 1033; CHECK-NEXT: vmov.8 q0[14], r12 1034; CHECK-NEXT: vmov.8 q0[15], lr 1035; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 1036entry: 1037 %offs = load <16 x i8>, ptr %offptr, align 1 1038 %offs.zext = zext <16 x i8> %offs to <16 x i32> 1039 %ptrs = getelementptr inbounds i16, ptr %base, <16 x i32> %offs.zext 1040 %ptrs.cast = bitcast <16 x ptr> %ptrs to <16 x ptr> 1041 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs.cast, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 1042 ret <16 x i8> %gather 1043} 1044 1045define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_range(ptr %base, ptr %offptr) { 1046; CHECK-LABEL: unscaled_v16i8_i8_range: 1047; CHECK: @ %bb.0: @ %entry 1048; CHECK-NEXT: adr r1, .LCPI19_0 1049; CHECK-NEXT: vldrw.u32 q1, [r1] 1050; CHECK-NEXT: vldrb.u8 q0, [r0, q1] 1051; CHECK-NEXT: bx lr 1052; CHECK-NEXT: .p2align 4 1053; CHECK-NEXT: @ %bb.1: 1054; CHECK-NEXT: .LCPI19_0: 1055; CHECK-NEXT: .byte 82 @ 0x52 1056; CHECK-NEXT: .byte 85 @ 0x55 1057; CHECK-NEXT: .byte 88 @ 0x58 1058; CHECK-NEXT: .byte 91 @ 0x5b 1059; CHECK-NEXT: .byte 94 @ 0x5e 1060; CHECK-NEXT: .byte 97 @ 0x61 1061; CHECK-NEXT: .byte 100 @ 0x64 1062; CHECK-NEXT: .byte 103 @ 0x67 1063; CHECK-NEXT: .byte 106 @ 0x6a 1064; CHECK-NEXT: .byte 109 @ 0x6d 1065; CHECK-NEXT: .byte 112 @ 0x70 1066; CHECK-NEXT: .byte 115 @ 0x73 1067; CHECK-NEXT: .byte 118 @ 0x76 1068; CHECK-NEXT: .byte 121 @ 0x79 1069; CHECK-NEXT: .byte 124 @ 0x7c 1070; CHECK-NEXT: .byte 127 @ 0x7f 1071entry: 1072 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> 1073 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i32 82 1074 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 1075 ret <16 x i8> %gather 1076} 1077 1078define arm_aapcs_vfpcc <16 x i8> @unscaled_v16i8_i8_rangebad(ptr %base, ptr %offptr) { 1079; CHECK-LABEL: unscaled_v16i8_i8_rangebad: 1080; CHECK: @ %bb.0: @ %entry 1081; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 1082; CHECK-NEXT: push {r4, r5, r6, r7, lr} 1083; CHECK-NEXT: adr r1, .LCPI20_0 1084; CHECK-NEXT: adr r4, .LCPI20_1 1085; CHECK-NEXT: vldrw.u32 q0, [r1] 1086; CHECK-NEXT: adr r7, .LCPI20_3 1087; CHECK-NEXT: vadd.i32 q0, q0, r0 1088; CHECK-NEXT: vmov r1, r2, d1 1089; CHECK-NEXT: vmov r3, r5, d0 1090; CHECK-NEXT: vldrw.u32 q0, [r4] 1091; CHECK-NEXT: vadd.i32 q1, q0, r0 1092; CHECK-NEXT: vmov r4, r6, d3 1093; CHECK-NEXT: ldrb.w lr, [r1] 1094; CHECK-NEXT: adr r1, .LCPI20_2 1095; CHECK-NEXT: vldrw.u32 q0, [r1] 1096; CHECK-NEXT: ldrb.w r12, [r2] 1097; CHECK-NEXT: ldrb r1, [r5] 1098; CHECK-NEXT: vadd.i32 q2, q0, r0 1099; CHECK-NEXT: ldrb r3, [r3] 1100; CHECK-NEXT: ldrb r2, [r6] 1101; CHECK-NEXT: vmov r5, r6, d4 1102; CHECK-NEXT: ldrb r4, [r4] 1103; CHECK-NEXT: ldrb r5, [r5] 1104; CHECK-NEXT: vmov.8 q0[0], r5 1105; CHECK-NEXT: ldrb r5, [r6] 1106; CHECK-NEXT: vmov.8 q0[1], r5 1107; CHECK-NEXT: vmov r5, r6, d5 1108; CHECK-NEXT: vldrw.u32 q2, [r7] 1109; CHECK-NEXT: vadd.i32 q2, q2, r0 1110; CHECK-NEXT: ldrb r0, [r5] 1111; CHECK-NEXT: ldrb r6, [r6] 1112; CHECK-NEXT: vmov.8 q0[2], r0 1113; CHECK-NEXT: vmov r0, r5, d4 1114; CHECK-NEXT: vmov.8 q0[3], r6 1115; CHECK-NEXT: ldrb r0, [r0] 1116; CHECK-NEXT: ldrb r5, [r5] 1117; CHECK-NEXT: vmov.8 q0[4], r0 1118; CHECK-NEXT: vmov.8 q0[5], r5 1119; CHECK-NEXT: vmov r0, r5, d5 1120; CHECK-NEXT: ldrb r0, [r0] 1121; CHECK-NEXT: ldrb r5, [r5] 1122; CHECK-NEXT: vmov.8 q0[6], r0 1123; CHECK-NEXT: vmov.8 q0[7], r5 1124; CHECK-NEXT: vmov r0, r5, d2 1125; CHECK-NEXT: ldrb r0, [r0] 1126; CHECK-NEXT: ldrb r5, [r5] 1127; CHECK-NEXT: vmov.8 q0[8], r0 1128; CHECK-NEXT: vmov.8 q0[9], r5 1129; CHECK-NEXT: vmov.8 q0[10], r4 1130; CHECK-NEXT: vmov.8 q0[11], r2 1131; CHECK-NEXT: vmov.8 q0[12], r3 1132; CHECK-NEXT: vmov.8 q0[13], r1 1133; CHECK-NEXT: vmov.8 q0[14], lr 1134; CHECK-NEXT: vmov.8 q0[15], r12 1135; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 1136; CHECK-NEXT: .p2align 4 1137; CHECK-NEXT: @ %bb.1: 1138; CHECK-NEXT: .LCPI20_0: 1139; CHECK-NEXT: .long 119 @ 0x77 1140; CHECK-NEXT: .long 122 @ 0x7a 1141; CHECK-NEXT: .long 125 @ 0x7d 1142; CHECK-NEXT: .long 128 @ 0x80 1143; CHECK-NEXT: .LCPI20_1: 1144; CHECK-NEXT: .long 107 @ 0x6b 1145; CHECK-NEXT: .long 110 @ 0x6e 1146; CHECK-NEXT: .long 113 @ 0x71 1147; CHECK-NEXT: .long 116 @ 0x74 1148; CHECK-NEXT: .LCPI20_2: 1149; CHECK-NEXT: .long 83 @ 0x53 1150; CHECK-NEXT: .long 86 @ 0x56 1151; CHECK-NEXT: .long 89 @ 0x59 1152; CHECK-NEXT: .long 92 @ 0x5c 1153; CHECK-NEXT: .LCPI20_3: 1154; CHECK-NEXT: .long 95 @ 0x5f 1155; CHECK-NEXT: .long 98 @ 0x62 1156; CHECK-NEXT: .long 101 @ 0x65 1157; CHECK-NEXT: .long 104 @ 0x68 1158entry: 1159 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45> 1160 %ptrs2 = getelementptr inbounds i8, <16 x ptr> %ptrs, i32 83 1161 %gather = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs2, i32 1, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 1162 ret <16 x i8> %gather 1163} 1164 1165declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) 1166declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) 1167declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>) 1168