1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8(<16 x i8> %x, <16 x i8> %y) { 5; CHECK-LABEL: vhadds_v16i8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vadd.i8 q0, q0, q1 8; CHECK-NEXT: vshr.s8 q0, q0, #1 9; CHECK-NEXT: bx lr 10 %add = add <16 x i8> %x, %y 11 %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 12 ret <16 x i8> %half 13} 14define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) { 15; CHECK-LABEL: vhaddu_v16i8: 16; CHECK: @ %bb.0: 17; CHECK-NEXT: vadd.i8 q0, q0, q1 18; CHECK-NEXT: vshr.u8 q0, q0, #1 19; CHECK-NEXT: bx lr 20 %add = add <16 x i8> %x, %y 21 %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 22 ret <16 x i8> %half 23} 24define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16(<8 x i16> %x, <8 x i16> %y) { 25; CHECK-LABEL: vhadds_v8i16: 26; CHECK: @ %bb.0: 27; CHECK-NEXT: vadd.i16 q0, q0, q1 28; CHECK-NEXT: vshr.s16 q0, q0, #1 29; CHECK-NEXT: bx lr 30 %add = add <8 x i16> %x, %y 31 %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 32 ret <8 x i16> %half 33} 34define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) { 35; CHECK-LABEL: vhaddu_v8i16: 36; CHECK: @ %bb.0: 37; CHECK-NEXT: vadd.i16 q0, q0, q1 38; CHECK-NEXT: vshr.u16 q0, q0, #1 39; CHECK-NEXT: bx lr 40 %add = add <8 x i16> %x, %y 41 %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 42 ret <8 x i16> %half 43} 44define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32(<4 x i32> %x, <4 x i32> %y) { 45; CHECK-LABEL: vhadds_v4i32: 46; CHECK: @ %bb.0: 47; CHECK-NEXT: vadd.i32 q0, q0, q1 48; CHECK-NEXT: vshr.s32 q0, q0, #1 49; CHECK-NEXT: bx lr 50 %add = add <4 x i32> %x, %y 51 %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 52 ret <4 x i32> %half 53} 54define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) { 55; CHECK-LABEL: vhaddu_v4i32: 56; CHECK: @ %bb.0: 57; CHECK-NEXT: vadd.i32 q0, q0, q1 58; CHECK-NEXT: vshr.u32 q0, q0, #1 59; CHECK-NEXT: bx lr 60 %add = add <4 x i32> %x, %y 61 %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 62 ret <4 x i32> %half 63} 64define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8(<16 x i8> %x, <16 x i8> %y) { 65; CHECK-LABEL: vhsubs_v16i8: 66; CHECK: @ %bb.0: 67; CHECK-NEXT: vsub.i8 q0, q0, q1 68; CHECK-NEXT: vshr.s8 q0, q0, #1 69; CHECK-NEXT: bx lr 70 %sub = sub <16 x i8> %x, %y 71 %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 72 ret <16 x i8> %half 73} 74define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8(<16 x i8> %x, <16 x i8> %y) { 75; CHECK-LABEL: vhsubu_v16i8: 76; CHECK: @ %bb.0: 77; CHECK-NEXT: vsub.i8 q0, q0, q1 78; CHECK-NEXT: vshr.u8 q0, q0, #1 79; CHECK-NEXT: bx lr 80 %sub = sub <16 x i8> %x, %y 81 %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 82 ret <16 x i8> %half 83} 84define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16(<8 x i16> %x, <8 x i16> %y) { 85; CHECK-LABEL: vhsubs_v8i16: 86; CHECK: @ %bb.0: 87; CHECK-NEXT: vsub.i16 q0, q0, q1 88; CHECK-NEXT: vshr.s16 q0, q0, #1 89; CHECK-NEXT: bx lr 90 %sub = sub <8 x i16> %x, %y 91 %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 92 ret <8 x i16> %half 93} 94define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16(<8 x i16> %x, <8 x i16> %y) { 95; CHECK-LABEL: vhsubu_v8i16: 96; CHECK: @ %bb.0: 97; CHECK-NEXT: vsub.i16 q0, q0, q1 98; CHECK-NEXT: vshr.u16 q0, q0, #1 99; CHECK-NEXT: bx lr 100 %sub = sub <8 x i16> %x, %y 101 %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 102 ret <8 x i16> %half 103} 104define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32(<4 x i32> %x, <4 x i32> %y) { 105; CHECK-LABEL: vhsubs_v4i32: 106; CHECK: @ %bb.0: 107; CHECK-NEXT: vsub.i32 q0, q0, q1 108; CHECK-NEXT: vshr.s32 q0, q0, #1 109; CHECK-NEXT: bx lr 110 %sub = sub <4 x i32> %x, %y 111 %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1> 112 ret <4 x i32> %half 113} 114define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32(<4 x i32> %x, <4 x i32> %y) { 115; CHECK-LABEL: vhsubu_v4i32: 116; CHECK: @ %bb.0: 117; CHECK-NEXT: vsub.i32 q0, q0, q1 118; CHECK-NEXT: vshr.u32 q0, q0, #1 119; CHECK-NEXT: bx lr 120 %sub = sub <4 x i32> %x, %y 121 %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1> 122 ret <4 x i32> %half 123} 124 125define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8_nw(<16 x i8> %x, <16 x i8> %y) { 126; CHECK-LABEL: vhadds_v16i8_nw: 127; CHECK: @ %bb.0: 128; CHECK-NEXT: vhadd.s8 q0, q0, q1 129; CHECK-NEXT: bx lr 130 %add = add nsw <16 x i8> %x, %y 131 %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 132 ret <16 x i8> %half 133} 134define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) { 135; CHECK-LABEL: vhaddu_v16i8_nw: 136; CHECK: @ %bb.0: 137; CHECK-NEXT: vhadd.u8 q0, q0, q1 138; CHECK-NEXT: bx lr 139 %add = add nuw <16 x i8> %x, %y 140 %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 141 ret <16 x i8> %half 142} 143define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16_nw(<8 x i16> %x, <8 x i16> %y) { 144; CHECK-LABEL: vhadds_v8i16_nw: 145; CHECK: @ %bb.0: 146; CHECK-NEXT: vhadd.s16 q0, q0, q1 147; CHECK-NEXT: bx lr 148 %add = add nsw <8 x i16> %x, %y 149 %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 150 ret <8 x i16> %half 151} 152define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) { 153; CHECK-LABEL: vhaddu_v8i16_nw: 154; CHECK: @ %bb.0: 155; CHECK-NEXT: vhadd.u16 q0, q0, q1 156; CHECK-NEXT: bx lr 157 %add = add nuw <8 x i16> %x, %y 158 %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 159 ret <8 x i16> %half 160} 161define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32_nw(<4 x i32> %x, <4 x i32> %y) { 162; CHECK-LABEL: vhadds_v4i32_nw: 163; CHECK: @ %bb.0: 164; CHECK-NEXT: vhadd.s32 q0, q0, q1 165; CHECK-NEXT: bx lr 166 %add = add nsw <4 x i32> %x, %y 167 %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 168 ret <4 x i32> %half 169} 170define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) { 171; CHECK-LABEL: vhaddu_v4i32_nw: 172; CHECK: @ %bb.0: 173; CHECK-NEXT: vhadd.u32 q0, q0, q1 174; CHECK-NEXT: bx lr 175 %add = add nuw <4 x i32> %x, %y 176 %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 177 ret <4 x i32> %half 178} 179define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8_nw(<16 x i8> %x, <16 x i8> %y) { 180; CHECK-LABEL: vhsubs_v16i8_nw: 181; CHECK: @ %bb.0: 182; CHECK-NEXT: vhsub.s8 q0, q0, q1 183; CHECK-NEXT: bx lr 184 %sub = sub nsw <16 x i8> %x, %y 185 %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 186 ret <16 x i8> %half 187} 188define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) { 189; CHECK-LABEL: vhsubu_v16i8_nw: 190; CHECK: @ %bb.0: 191; CHECK-NEXT: vhsub.u8 q0, q0, q1 192; CHECK-NEXT: bx lr 193 %sub = sub nuw <16 x i8> %x, %y 194 %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 195 ret <16 x i8> %half 196} 197define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16_nw(<8 x i16> %x, <8 x i16> %y) { 198; CHECK-LABEL: vhsubs_v8i16_nw: 199; CHECK: @ %bb.0: 200; CHECK-NEXT: vhsub.s16 q0, q0, q1 201; CHECK-NEXT: bx lr 202 %sub = sub nsw <8 x i16> %x, %y 203 %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 204 ret <8 x i16> %half 205} 206define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) { 207; CHECK-LABEL: vhsubu_v8i16_nw: 208; CHECK: @ %bb.0: 209; CHECK-NEXT: vhsub.u16 q0, q0, q1 210; CHECK-NEXT: bx lr 211 %sub = sub nuw <8 x i16> %x, %y 212 %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 213 ret <8 x i16> %half 214} 215define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32_nw(<4 x i32> %x, <4 x i32> %y) { 216; CHECK-LABEL: vhsubs_v4i32_nw: 217; CHECK: @ %bb.0: 218; CHECK-NEXT: vhsub.s32 q0, q0, q1 219; CHECK-NEXT: bx lr 220 %sub = sub nsw <4 x i32> %x, %y 221 %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1> 222 ret <4 x i32> %half 223} 224define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) { 225; CHECK-LABEL: vhsubu_v4i32_nw: 226; CHECK: @ %bb.0: 227; CHECK-NEXT: vhsub.u32 q0, q0, q1 228; CHECK-NEXT: bx lr 229 %sub = sub nuw <4 x i32> %x, %y 230 %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1> 231 ret <4 x i32> %half 232} 233define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8(<16 x i8> %x, <16 x i8> %y) { 234; CHECK-LABEL: vrhadds_v16i8: 235; CHECK: @ %bb.0: 236; CHECK-NEXT: vadd.i8 q0, q0, q1 237; CHECK-NEXT: movs r0, #1 238; CHECK-NEXT: vadd.i8 q0, q0, r0 239; CHECK-NEXT: vshr.s8 q0, q0, #1 240; CHECK-NEXT: bx lr 241 %add = add <16 x i8> %x, %y 242 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 243 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 244 ret <16 x i8> %half 245} 246define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) { 247; CHECK-LABEL: vrhaddu_v16i8: 248; CHECK: @ %bb.0: 249; CHECK-NEXT: vadd.i8 q0, q0, q1 250; CHECK-NEXT: movs r0, #1 251; CHECK-NEXT: vadd.i8 q0, q0, r0 252; CHECK-NEXT: vshr.u8 q0, q0, #1 253; CHECK-NEXT: bx lr 254 %add = add <16 x i8> %x, %y 255 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 256 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 257 ret <16 x i8> %half 258} 259define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16(<8 x i16> %x, <8 x i16> %y) { 260; CHECK-LABEL: vrhadds_v8i16: 261; CHECK: @ %bb.0: 262; CHECK-NEXT: vadd.i16 q0, q0, q1 263; CHECK-NEXT: movs r0, #1 264; CHECK-NEXT: vadd.i16 q0, q0, r0 265; CHECK-NEXT: vshr.s16 q0, q0, #1 266; CHECK-NEXT: bx lr 267 %add = add <8 x i16> %x, %y 268 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 269 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 270 ret <8 x i16> %half 271} 272define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) { 273; CHECK-LABEL: vrhaddu_v8i16: 274; CHECK: @ %bb.0: 275; CHECK-NEXT: vadd.i16 q0, q0, q1 276; CHECK-NEXT: movs r0, #1 277; CHECK-NEXT: vadd.i16 q0, q0, r0 278; CHECK-NEXT: vshr.u16 q0, q0, #1 279; CHECK-NEXT: bx lr 280 %add = add <8 x i16> %x, %y 281 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 282 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 283 ret <8 x i16> %half 284} 285define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32(<4 x i32> %x, <4 x i32> %y) { 286; CHECK-LABEL: vrhadds_v4i32: 287; CHECK: @ %bb.0: 288; CHECK-NEXT: vadd.i32 q0, q0, q1 289; CHECK-NEXT: movs r0, #1 290; CHECK-NEXT: vadd.i32 q0, q0, r0 291; CHECK-NEXT: vshr.s32 q0, q0, #1 292; CHECK-NEXT: bx lr 293 %add = add <4 x i32> %x, %y 294 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 295 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 296 ret <4 x i32> %half 297} 298define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) { 299; CHECK-LABEL: vrhaddu_v4i32: 300; CHECK: @ %bb.0: 301; CHECK-NEXT: vadd.i32 q0, q0, q1 302; CHECK-NEXT: movs r0, #1 303; CHECK-NEXT: vadd.i32 q0, q0, r0 304; CHECK-NEXT: vshr.u32 q0, q0, #1 305; CHECK-NEXT: bx lr 306 %add = add <4 x i32> %x, %y 307 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 308 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 309 ret <4 x i32> %half 310} 311define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) { 312; CHECK-LABEL: vrhadds_v16i8_nwop: 313; CHECK: @ %bb.0: 314; CHECK-NEXT: vadd.i8 q0, q0, q1 315; CHECK-NEXT: movs r0, #1 316; CHECK-NEXT: vadd.i8 q0, q0, r0 317; CHECK-NEXT: vshr.s8 q0, q0, #1 318; CHECK-NEXT: bx lr 319 %add = add nsw <16 x i8> %x, %y 320 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 321 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 322 ret <16 x i8> %half 323} 324define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwop(<16 x i8> %x, <16 x i8> %y) { 325; CHECK-LABEL: vrhaddu_v16i8_nwop: 326; CHECK: @ %bb.0: 327; CHECK-NEXT: vadd.i8 q0, q0, q1 328; CHECK-NEXT: movs r0, #1 329; CHECK-NEXT: vadd.i8 q0, q0, r0 330; CHECK-NEXT: vshr.u8 q0, q0, #1 331; CHECK-NEXT: bx lr 332 %add = add nuw <16 x i8> %x, %y 333 %round = add <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 334 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 335 ret <16 x i8> %half 336} 337define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) { 338; CHECK-LABEL: vrhadds_v8i16_nwop: 339; CHECK: @ %bb.0: 340; CHECK-NEXT: vadd.i16 q0, q0, q1 341; CHECK-NEXT: movs r0, #1 342; CHECK-NEXT: vadd.i16 q0, q0, r0 343; CHECK-NEXT: vshr.s16 q0, q0, #1 344; CHECK-NEXT: bx lr 345 %add = add nsw <8 x i16> %x, %y 346 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 347 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 348 ret <8 x i16> %half 349} 350define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwop(<8 x i16> %x, <8 x i16> %y) { 351; CHECK-LABEL: vrhaddu_v8i16_nwop: 352; CHECK: @ %bb.0: 353; CHECK-NEXT: vadd.i16 q0, q0, q1 354; CHECK-NEXT: movs r0, #1 355; CHECK-NEXT: vadd.i16 q0, q0, r0 356; CHECK-NEXT: vshr.u16 q0, q0, #1 357; CHECK-NEXT: bx lr 358 %add = add nuw <8 x i16> %x, %y 359 %round = add <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 360 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 361 ret <8 x i16> %half 362} 363define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) { 364; CHECK-LABEL: vrhadds_v4i32_nwop: 365; CHECK: @ %bb.0: 366; CHECK-NEXT: vadd.i32 q0, q0, q1 367; CHECK-NEXT: movs r0, #1 368; CHECK-NEXT: vadd.i32 q0, q0, r0 369; CHECK-NEXT: vshr.s32 q0, q0, #1 370; CHECK-NEXT: bx lr 371 %add = add nsw <4 x i32> %x, %y 372 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 373 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 374 ret <4 x i32> %half 375} 376define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwop(<4 x i32> %x, <4 x i32> %y) { 377; CHECK-LABEL: vrhaddu_v4i32_nwop: 378; CHECK: @ %bb.0: 379; CHECK-NEXT: vadd.i32 q0, q0, q1 380; CHECK-NEXT: movs r0, #1 381; CHECK-NEXT: vadd.i32 q0, q0, r0 382; CHECK-NEXT: vshr.u32 q0, q0, #1 383; CHECK-NEXT: bx lr 384 %add = add nuw <4 x i32> %x, %y 385 %round = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 386 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 387 ret <4 x i32> %half 388} 389define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) { 390; CHECK-LABEL: vrhadds_v16i8_nwrnd: 391; CHECK: @ %bb.0: 392; CHECK-NEXT: vadd.i8 q0, q0, q1 393; CHECK-NEXT: movs r0, #1 394; CHECK-NEXT: vhadd.s8 q0, q0, r0 395; CHECK-NEXT: bx lr 396 %add = add <16 x i8> %x, %y 397 %round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 398 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 399 ret <16 x i8> %half 400} 401define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_nwrnd(<16 x i8> %x, <16 x i8> %y) { 402; CHECK-LABEL: vrhaddu_v16i8_nwrnd: 403; CHECK: @ %bb.0: 404; CHECK-NEXT: vadd.i8 q0, q0, q1 405; CHECK-NEXT: movs r0, #1 406; CHECK-NEXT: vhadd.u8 q0, q0, r0 407; CHECK-NEXT: bx lr 408 %add = add <16 x i8> %x, %y 409 %round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 410 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 411 ret <16 x i8> %half 412} 413define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) { 414; CHECK-LABEL: vrhadds_v8i16_nwrnd: 415; CHECK: @ %bb.0: 416; CHECK-NEXT: vadd.i16 q0, q0, q1 417; CHECK-NEXT: movs r0, #1 418; CHECK-NEXT: vhadd.s16 q0, q0, r0 419; CHECK-NEXT: bx lr 420 %add = add <8 x i16> %x, %y 421 %round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 422 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 423 ret <8 x i16> %half 424} 425define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_nwrnd(<8 x i16> %x, <8 x i16> %y) { 426; CHECK-LABEL: vrhaddu_v8i16_nwrnd: 427; CHECK: @ %bb.0: 428; CHECK-NEXT: vadd.i16 q0, q0, q1 429; CHECK-NEXT: movs r0, #1 430; CHECK-NEXT: vhadd.u16 q0, q0, r0 431; CHECK-NEXT: bx lr 432 %add = add <8 x i16> %x, %y 433 %round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 434 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 435 ret <8 x i16> %half 436} 437define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) { 438; CHECK-LABEL: vrhadds_v4i32_nwrnd: 439; CHECK: @ %bb.0: 440; CHECK-NEXT: vadd.i32 q0, q0, q1 441; CHECK-NEXT: movs r0, #1 442; CHECK-NEXT: vhadd.s32 q0, q0, r0 443; CHECK-NEXT: bx lr 444 %add = add <4 x i32> %x, %y 445 %round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 446 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 447 ret <4 x i32> %half 448} 449define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_nwrnd(<4 x i32> %x, <4 x i32> %y) { 450; CHECK-LABEL: vrhaddu_v4i32_nwrnd: 451; CHECK: @ %bb.0: 452; CHECK-NEXT: vadd.i32 q0, q0, q1 453; CHECK-NEXT: movs r0, #1 454; CHECK-NEXT: vhadd.u32 q0, q0, r0 455; CHECK-NEXT: bx lr 456 %add = add <4 x i32> %x, %y 457 %round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 458 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 459 ret <4 x i32> %half 460} 461define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) { 462; CHECK-LABEL: vrhadds_v16i8_both_nw: 463; CHECK: @ %bb.0: 464; CHECK-NEXT: vrhadd.s8 q0, q0, q1 465; CHECK-NEXT: bx lr 466 %add = add nsw <16 x i8> %x, %y 467 %round = add nsw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 468 %half = ashr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 469 ret <16 x i8> %half 470} 471define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8_both_nw(<16 x i8> %x, <16 x i8> %y) { 472; CHECK-LABEL: vrhaddu_v16i8_both_nw: 473; CHECK: @ %bb.0: 474; CHECK-NEXT: vrhadd.u8 q0, q0, q1 475; CHECK-NEXT: bx lr 476 %add = add nuw <16 x i8> %x, %y 477 %round = add nuw <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 478 %half = lshr <16 x i8> %round, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 479 ret <16 x i8> %half 480} 481define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) { 482; CHECK-LABEL: vrhadds_v8i16_both_nw: 483; CHECK: @ %bb.0: 484; CHECK-NEXT: vrhadd.s16 q0, q0, q1 485; CHECK-NEXT: bx lr 486 %add = add nsw <8 x i16> %x, %y 487 %round = add nsw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 488 %half = ashr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 489 ret <8 x i16> %half 490} 491define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16_both_nw(<8 x i16> %x, <8 x i16> %y) { 492; CHECK-LABEL: vrhaddu_v8i16_both_nw: 493; CHECK: @ %bb.0: 494; CHECK-NEXT: vrhadd.u16 q0, q0, q1 495; CHECK-NEXT: bx lr 496 %add = add nuw <8 x i16> %x, %y 497 %round = add nuw <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 498 %half = lshr <8 x i16> %round, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 499 ret <8 x i16> %half 500} 501define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) { 502; CHECK-LABEL: vrhadds_v4i32_both_nw: 503; CHECK: @ %bb.0: 504; CHECK-NEXT: vrhadd.s32 q0, q0, q1 505; CHECK-NEXT: bx lr 506 %add = add nsw <4 x i32> %x, %y 507 %round = add nsw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 508 %half = ashr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 509 ret <4 x i32> %half 510} 511define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32_both_nw(<4 x i32> %x, <4 x i32> %y) { 512; CHECK-LABEL: vrhaddu_v4i32_both_nw: 513; CHECK: @ %bb.0: 514; CHECK-NEXT: vrhadd.u32 q0, q0, q1 515; CHECK-NEXT: bx lr 516 %add = add nuw <4 x i32> %x, %y 517 %round = add nuw <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 518 %half = lshr <4 x i32> %round, <i32 1, i32 1, i32 1, i32 1> 519 ret <4 x i32> %half 520} 521