1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @add_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 5; CHECK-LABEL: add_ashr_v16i8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vadd.i8 q0, q0, q1 8; CHECK-NEXT: vshr.s8 q0, q0, #1 9; CHECK-NEXT: bx lr 10entry: 11 %0 = add <16 x i8> %src1, %src2 12 %1 = ashr <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 13 ret <16 x i8> %1 14} 15 16define arm_aapcs_vfpcc <8 x i16> @add_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 17; CHECK-LABEL: add_ashr_v8i16: 18; CHECK: @ %bb.0: @ %entry 19; CHECK-NEXT: vadd.i16 q0, q0, q1 20; CHECK-NEXT: vshr.s16 q0, q0, #1 21; CHECK-NEXT: bx lr 22entry: 23 %0 = add <8 x i16> %src1, %src2 24 %1 = ashr <8 x i16> %0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 25 ret <8 x i16> %1 26} 27 28define arm_aapcs_vfpcc <4 x i32> @add_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 29; CHECK-LABEL: add_ashr_v4i32: 30; CHECK: @ %bb.0: @ %entry 31; CHECK-NEXT: vhadd.s32 q0, q0, q1 32; CHECK-NEXT: bx lr 33entry: 34 %0 = add nsw <4 x i32> %src1, %src2 35 %1 = ashr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 36 ret <4 x i32> %1 37} 38 39define arm_aapcs_vfpcc <16 x i8> @add_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 40; CHECK-LABEL: add_lshr_v16i8: 41; CHECK: @ %bb.0: @ %entry 42; CHECK-NEXT: vadd.i8 q0, q0, q1 43; CHECK-NEXT: vshr.u8 q0, q0, #1 44; CHECK-NEXT: bx lr 45entry: 46 %0 = add <16 x i8> %src1, %src2 47 %1 = lshr <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 48 ret <16 x i8> %1 49} 50 51define arm_aapcs_vfpcc <8 x i16> @add_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 52; CHECK-LABEL: add_lshr_v8i16: 53; CHECK: @ %bb.0: @ %entry 54; CHECK-NEXT: vadd.i16 q0, q0, q1 55; CHECK-NEXT: vshr.u16 q0, q0, #1 56; CHECK-NEXT: bx lr 57entry: 58 %0 = add <8 x i16> %src1, %src2 59 %1 = lshr <8 x i16> %0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 60 ret <8 x i16> %1 61} 62 63define arm_aapcs_vfpcc <4 x i32> @add_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 64; CHECK-LABEL: add_lshr_v4i32: 65; CHECK: @ %bb.0: @ %entry 66; CHECK-NEXT: vadd.i32 q0, q0, q1 67; CHECK-NEXT: vshr.u32 q0, q0, #1 68; CHECK-NEXT: bx lr 69entry: 70 %0 = add nsw <4 x i32> %src1, %src2 71 %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 72 ret <4 x i32> %1 73} 74 75define arm_aapcs_vfpcc <16 x i8> @sub_ashr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 76; CHECK-LABEL: sub_ashr_v16i8: 77; CHECK: @ %bb.0: @ %entry 78; CHECK-NEXT: vsub.i8 q0, q0, q1 79; CHECK-NEXT: vshr.s8 q0, q0, #1 80; CHECK-NEXT: bx lr 81entry: 82 %0 = sub <16 x i8> %src1, %src2 83 %1 = ashr <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 84 ret <16 x i8> %1 85} 86 87define arm_aapcs_vfpcc <8 x i16> @sub_ashr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 88; CHECK-LABEL: sub_ashr_v8i16: 89; CHECK: @ %bb.0: @ %entry 90; CHECK-NEXT: vsub.i16 q0, q0, q1 91; CHECK-NEXT: vshr.s16 q0, q0, #1 92; CHECK-NEXT: bx lr 93entry: 94 %0 = sub <8 x i16> %src1, %src2 95 %1 = ashr <8 x i16> %0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 96 ret <8 x i16> %1 97} 98 99define arm_aapcs_vfpcc <4 x i32> @sub_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 100; CHECK-LABEL: sub_ashr_v4i32: 101; CHECK: @ %bb.0: @ %entry 102; CHECK-NEXT: vhsub.s32 q0, q0, q1 103; CHECK-NEXT: bx lr 104entry: 105 %0 = sub nsw <4 x i32> %src1, %src2 106 %1 = ashr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 107 ret <4 x i32> %1 108} 109 110define arm_aapcs_vfpcc <16 x i8> @sub_lshr_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 111; CHECK-LABEL: sub_lshr_v16i8: 112; CHECK: @ %bb.0: @ %entry 113; CHECK-NEXT: vsub.i8 q0, q0, q1 114; CHECK-NEXT: vshr.u8 q0, q0, #1 115; CHECK-NEXT: bx lr 116entry: 117 %0 = sub <16 x i8> %src1, %src2 118 %1 = lshr <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 119 ret <16 x i8> %1 120} 121 122define arm_aapcs_vfpcc <8 x i16> @sub_lshr_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 123; CHECK-LABEL: sub_lshr_v8i16: 124; CHECK: @ %bb.0: @ %entry 125; CHECK-NEXT: vsub.i16 q0, q0, q1 126; CHECK-NEXT: vshr.u16 q0, q0, #1 127; CHECK-NEXT: bx lr 128entry: 129 %0 = sub <8 x i16> %src1, %src2 130 %1 = lshr <8 x i16> %0, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 131 ret <8 x i16> %1 132} 133 134define arm_aapcs_vfpcc <4 x i32> @sub_lshr_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 135; CHECK-LABEL: sub_lshr_v4i32: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vsub.i32 q0, q0, q1 138; CHECK-NEXT: vshr.u32 q0, q0, #1 139; CHECK-NEXT: bx lr 140entry: 141 %0 = sub nsw <4 x i32> %src1, %src2 142 %1 = lshr <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 143 ret <4 x i32> %1 144} 145 146 147 148define arm_aapcs_vfpcc <16 x i8> @add_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 149; CHECK-LABEL: add_sdiv_v16i8: 150; CHECK: @ %bb.0: @ %entry 151; CHECK-NEXT: vadd.i8 q0, q0, q1 152; CHECK-NEXT: vshr.u8 q1, q0, #7 153; CHECK-NEXT: vadd.i8 q0, q0, q1 154; CHECK-NEXT: vshr.s8 q0, q0, #1 155; CHECK-NEXT: bx lr 156entry: 157 %0 = add <16 x i8> %src1, %src2 158 %1 = sdiv <16 x i8> %0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 159 ret <16 x i8> %1 160} 161 162define arm_aapcs_vfpcc <8 x i16> @add_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 163; CHECK-LABEL: add_sdiv_v8i16: 164; CHECK: @ %bb.0: @ %entry 165; CHECK-NEXT: vadd.i16 q0, q0, q1 166; CHECK-NEXT: vshr.u16 q1, q0, #15 167; CHECK-NEXT: vadd.i16 q0, q0, q1 168; CHECK-NEXT: vshr.s16 q0, q0, #1 169; CHECK-NEXT: bx lr 170entry: 171 %0 = add <8 x i16> %src1, %src2 172 %1 = sdiv <8 x i16> %0, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 173 ret <8 x i16> %1 174} 175 176define arm_aapcs_vfpcc <4 x i32> @add_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 177; CHECK-LABEL: add_sdiv_v4i32: 178; CHECK: @ %bb.0: @ %entry 179; CHECK-NEXT: vadd.i32 q0, q0, q1 180; CHECK-NEXT: vshr.u32 q1, q0, #31 181; CHECK-NEXT: vadd.i32 q0, q0, q1 182; CHECK-NEXT: vshr.s32 q0, q0, #1 183; CHECK-NEXT: bx lr 184entry: 185 %0 = add nsw <4 x i32> %src1, %src2 186 %1 = sdiv <4 x i32> %0, <i32 2, i32 2, i32 2, i32 2> 187 ret <4 x i32> %1 188} 189 190define arm_aapcs_vfpcc <16 x i8> @add_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 191; CHECK-LABEL: add_udiv_v16i8: 192; CHECK: @ %bb.0: @ %entry 193; CHECK-NEXT: vadd.i8 q0, q0, q1 194; CHECK-NEXT: vshr.u8 q0, q0, #1 195; CHECK-NEXT: bx lr 196entry: 197 %0 = add <16 x i8> %src1, %src2 198 %1 = udiv <16 x i8> %0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 199 ret <16 x i8> %1 200} 201 202define arm_aapcs_vfpcc <8 x i16> @add_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 203; CHECK-LABEL: add_udiv_v8i16: 204; CHECK: @ %bb.0: @ %entry 205; CHECK-NEXT: vadd.i16 q0, q0, q1 206; CHECK-NEXT: vshr.u16 q0, q0, #1 207; CHECK-NEXT: bx lr 208entry: 209 %0 = add <8 x i16> %src1, %src2 210 %1 = udiv <8 x i16> %0, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 211 ret <8 x i16> %1 212} 213 214define arm_aapcs_vfpcc <4 x i32> @add_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 215; CHECK-LABEL: add_udiv_v4i32: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vadd.i32 q0, q0, q1 218; CHECK-NEXT: vshr.u32 q0, q0, #1 219; CHECK-NEXT: bx lr 220entry: 221 %0 = add nsw <4 x i32> %src1, %src2 222 %1 = udiv <4 x i32> %0, <i32 2, i32 2, i32 2, i32 2> 223 ret <4 x i32> %1 224} 225 226define arm_aapcs_vfpcc <16 x i8> @sub_sdiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 227; CHECK-LABEL: sub_sdiv_v16i8: 228; CHECK: @ %bb.0: @ %entry 229; CHECK-NEXT: vsub.i8 q0, q0, q1 230; CHECK-NEXT: vshr.u8 q1, q0, #7 231; CHECK-NEXT: vadd.i8 q0, q0, q1 232; CHECK-NEXT: vshr.s8 q0, q0, #1 233; CHECK-NEXT: bx lr 234entry: 235 %0 = sub <16 x i8> %src1, %src2 236 %1 = sdiv <16 x i8> %0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 237 ret <16 x i8> %1 238} 239 240define arm_aapcs_vfpcc <8 x i16> @sub_sdiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 241; CHECK-LABEL: sub_sdiv_v8i16: 242; CHECK: @ %bb.0: @ %entry 243; CHECK-NEXT: vsub.i16 q0, q0, q1 244; CHECK-NEXT: vshr.u16 q1, q0, #15 245; CHECK-NEXT: vadd.i16 q0, q0, q1 246; CHECK-NEXT: vshr.s16 q0, q0, #1 247; CHECK-NEXT: bx lr 248entry: 249 %0 = sub <8 x i16> %src1, %src2 250 %1 = sdiv <8 x i16> %0, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 251 ret <8 x i16> %1 252} 253 254define arm_aapcs_vfpcc <4 x i32> @sub_sdiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 255; CHECK-LABEL: sub_sdiv_v4i32: 256; CHECK: @ %bb.0: @ %entry 257; CHECK-NEXT: vsub.i32 q0, q0, q1 258; CHECK-NEXT: vshr.u32 q1, q0, #31 259; CHECK-NEXT: vadd.i32 q0, q0, q1 260; CHECK-NEXT: vshr.s32 q0, q0, #1 261; CHECK-NEXT: bx lr 262entry: 263 %0 = sub nsw <4 x i32> %src1, %src2 264 %1 = sdiv <4 x i32> %0, <i32 2, i32 2, i32 2, i32 2> 265 ret <4 x i32> %1 266} 267 268define arm_aapcs_vfpcc <16 x i8> @sub_udiv_v16i8(<16 x i8> %src1, <16 x i8> %src2) { 269; CHECK-LABEL: sub_udiv_v16i8: 270; CHECK: @ %bb.0: @ %entry 271; CHECK-NEXT: vsub.i8 q0, q0, q1 272; CHECK-NEXT: vshr.u8 q0, q0, #1 273; CHECK-NEXT: bx lr 274entry: 275 %0 = sub <16 x i8> %src1, %src2 276 %1 = udiv <16 x i8> %0, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2> 277 ret <16 x i8> %1 278} 279 280define arm_aapcs_vfpcc <8 x i16> @sub_udiv_v8i16(<8 x i16> %src1, <8 x i16> %src2) { 281; CHECK-LABEL: sub_udiv_v8i16: 282; CHECK: @ %bb.0: @ %entry 283; CHECK-NEXT: vsub.i16 q0, q0, q1 284; CHECK-NEXT: vshr.u16 q0, q0, #1 285; CHECK-NEXT: bx lr 286entry: 287 %0 = sub <8 x i16> %src1, %src2 288 %1 = udiv <8 x i16> %0, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 289 ret <8 x i16> %1 290} 291 292define arm_aapcs_vfpcc <4 x i32> @sub_udiv_v4i32(<4 x i32> %src1, <4 x i32> %src2) { 293; CHECK-LABEL: sub_udiv_v4i32: 294; CHECK: @ %bb.0: @ %entry 295; CHECK-NEXT: vsub.i32 q0, q0, q1 296; CHECK-NEXT: vshr.u32 q0, q0, #1 297; CHECK-NEXT: bx lr 298entry: 299 %0 = sub nsw <4 x i32> %src1, %src2 300 %1 = udiv <4 x i32> %0, <i32 2, i32 2, i32 2, i32 2> 301 ret <4 x i32> %1 302} 303 304