1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedgatscat=false %s -o - | FileCheck --check-prefix NOGATSCAT %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=-mve %s -o - | FileCheck --check-prefix NOMVE %s 4 5define arm_aapcs_vfpcc <4 x i32> @unscaled_i32_i32_gather(ptr %base, ptr %offptr) { 6; NOGATSCAT-LABEL: unscaled_i32_i32_gather: 7; NOGATSCAT: @ %bb.0: @ %entry 8; NOGATSCAT-NEXT: vldrw.u32 q0, [r1] 9; NOGATSCAT-NEXT: vadd.i32 q0, q0, r0 10; NOGATSCAT-NEXT: vmov r0, r1, d1 11; NOGATSCAT-NEXT: vmov r2, r3, d0 12; NOGATSCAT-NEXT: ldr r0, [r0] 13; NOGATSCAT-NEXT: ldr r2, [r2] 14; NOGATSCAT-NEXT: ldr r1, [r1] 15; NOGATSCAT-NEXT: ldr r3, [r3] 16; NOGATSCAT-NEXT: vmov q0[2], q0[0], r2, r0 17; NOGATSCAT-NEXT: vmov q0[3], q0[1], r3, r1 18; NOGATSCAT-NEXT: bx lr 19; 20; NOMVE-LABEL: unscaled_i32_i32_gather: 21; NOMVE: @ %bb.0: @ %entry 22; NOMVE-NEXT: .save {r4, lr} 23; NOMVE-NEXT: push {r4, lr} 24; NOMVE-NEXT: ldm.w r1, {r2, r3, lr} 25; NOMVE-NEXT: ldr r4, [r1, #12] 26; NOMVE-NEXT: ldr.w r12, [r0, r2] 27; NOMVE-NEXT: ldr r1, [r0, r3] 28; NOMVE-NEXT: ldr.w r2, [r0, lr] 29; NOMVE-NEXT: ldr r3, [r0, r4] 30; NOMVE-NEXT: mov r0, r12 31; NOMVE-NEXT: pop {r4, pc} 32 33entry: 34 %offs = load <4 x i32>, ptr %offptr, align 4 35 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs 36 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 37 %gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 38 ret <4 x i32> %gather 39} 40 41declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) 42 43 44define arm_aapcs_vfpcc void @unscaled_i32_i8_scatter(ptr %base, ptr %offptr, <4 x i32> %input) { 45; NOGATSCAT-LABEL: unscaled_i32_i8_scatter: 46; NOGATSCAT: @ %bb.0: @ %entry 47; NOGATSCAT-NEXT: .save {r4, r5, r7, lr} 48; NOGATSCAT-NEXT: push {r4, r5, r7, lr} 49; NOGATSCAT-NEXT: vldrb.u32 q1, [r1] 50; NOGATSCAT-NEXT: vmov r1, r3, d0 51; NOGATSCAT-NEXT: vmov r4, r5, d1 52; NOGATSCAT-NEXT: vadd.i32 q1, q1, r0 53; NOGATSCAT-NEXT: vmov r0, r12, d2 54; NOGATSCAT-NEXT: vmov r2, lr, d3 55; NOGATSCAT-NEXT: str r1, [r0] 56; NOGATSCAT-NEXT: str.w r3, [r12] 57; NOGATSCAT-NEXT: str r4, [r2] 58; NOGATSCAT-NEXT: str.w r5, [lr] 59; NOGATSCAT-NEXT: pop {r4, r5, r7, pc} 60; 61; NOMVE-LABEL: unscaled_i32_i8_scatter: 62; NOMVE: @ %bb.0: @ %entry 63; NOMVE-NEXT: .save {r4, lr} 64; NOMVE-NEXT: push {r4, lr} 65; NOMVE-NEXT: ldrb.w r12, [r1] 66; NOMVE-NEXT: ldrb.w lr, [r1, #1] 67; NOMVE-NEXT: ldrb r4, [r1, #2] 68; NOMVE-NEXT: ldrb r1, [r1, #3] 69; NOMVE-NEXT: str.w r2, [r0, r12] 70; NOMVE-NEXT: ldr r2, [sp, #8] 71; NOMVE-NEXT: str.w r3, [r0, lr] 72; NOMVE-NEXT: str r2, [r0, r4] 73; NOMVE-NEXT: ldr r2, [sp, #12] 74; NOMVE-NEXT: str r2, [r0, r1] 75; NOMVE-NEXT: pop {r4, pc} 76 77entry: 78 %offs = load <4 x i8>, ptr %offptr, align 1 79 %offs.zext = zext <4 x i8> %offs to <4 x i32> 80 %byte_ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> %offs.zext 81 %ptrs = bitcast <4 x ptr> %byte_ptrs to <4 x ptr> 82 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %input, <4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 83 ret void 84} 85 86declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) 87