1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=armv7a-eabi -mattr=+neon -float-abi=hard %s -o - | FileCheck %s 3 4define <8 x i8> @vsubi8(<8 x i8> %A, <8 x i8> %B) { 5; CHECK-LABEL: vsubi8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vsub.i8 d0, d0, d1 8; CHECK-NEXT: bx lr 9 %tmp3 = sub <8 x i8> %A, %B 10 ret <8 x i8> %tmp3 11} 12 13define <4 x i16> @vsubi16(<4 x i16> %A, <4 x i16> %B) { 14; CHECK-LABEL: vsubi16: 15; CHECK: @ %bb.0: 16; CHECK-NEXT: vsub.i16 d0, d0, d1 17; CHECK-NEXT: bx lr 18 %tmp3 = sub <4 x i16> %A, %B 19 ret <4 x i16> %tmp3 20} 21 22define <2 x i32> @vsubi32(<2 x i32> %A, <2 x i32> %B) { 23; CHECK-LABEL: vsubi32: 24; CHECK: @ %bb.0: 25; CHECK-NEXT: vsub.i32 d0, d0, d1 26; CHECK-NEXT: bx lr 27 %tmp3 = sub <2 x i32> %A, %B 28 ret <2 x i32> %tmp3 29} 30 31define <1 x i64> @vsubi64(<1 x i64> %A, <1 x i64> %B) { 32; CHECK-LABEL: vsubi64: 33; CHECK: @ %bb.0: 34; CHECK-NEXT: vsub.i64 d0, d0, d1 35; CHECK-NEXT: bx lr 36 %tmp3 = sub <1 x i64> %A, %B 37 ret <1 x i64> %tmp3 38} 39 40define <2 x float> @vsubf32(<2 x float> %A, <2 x float> %B) { 41; CHECK-LABEL: vsubf32: 42; CHECK: @ %bb.0: 43; CHECK-NEXT: vsub.f32 d0, d0, d1 44; CHECK-NEXT: bx lr 45 %tmp3 = fsub <2 x float> %A, %B 46 ret <2 x float> %tmp3 47} 48 49define <16 x i8> @vsubQi8(<16 x i8> %A, <16 x i8> %B) { 50; CHECK-LABEL: vsubQi8: 51; CHECK: @ %bb.0: 52; CHECK-NEXT: vsub.i8 q0, q0, q1 53; CHECK-NEXT: bx lr 54 %tmp3 = sub <16 x i8> %A, %B 55 ret <16 x i8> %tmp3 56} 57 58define <8 x i16> @vsubQi16(<8 x i16> %A, <8 x i16> %B) { 59; CHECK-LABEL: vsubQi16: 60; CHECK: @ %bb.0: 61; CHECK-NEXT: vsub.i16 q0, q0, q1 62; CHECK-NEXT: bx lr 63 %tmp3 = sub <8 x i16> %A, %B 64 ret <8 x i16> %tmp3 65} 66 67define <4 x i32> @vsubQi32(<4 x i32> %A, <4 x i32> %B) { 68; CHECK-LABEL: vsubQi32: 69; CHECK: @ %bb.0: 70; CHECK-NEXT: vsub.i32 q0, q0, q1 71; CHECK-NEXT: bx lr 72 %tmp3 = sub <4 x i32> %A, %B 73 ret <4 x i32> %tmp3 74} 75 76define <2 x i64> @vsubQi64(<2 x i64> %A, <2 x i64> %B) { 77; CHECK-LABEL: vsubQi64: 78; CHECK: @ %bb.0: 79; CHECK-NEXT: vsub.i64 q0, q0, q1 80; CHECK-NEXT: bx lr 81 %tmp3 = sub <2 x i64> %A, %B 82 ret <2 x i64> %tmp3 83} 84 85define <4 x float> @vsubQf32(<4 x float> %A, <4 x float> %B) { 86; CHECK-LABEL: vsubQf32: 87; CHECK: @ %bb.0: 88; CHECK-NEXT: vsub.f32 q0, q0, q1 89; CHECK-NEXT: bx lr 90 %tmp3 = fsub <4 x float> %A, %B 91 ret <4 x float> %tmp3 92} 93 94define <8 x i8> @vrsubhni16(<8 x i16> %A, <8 x i16> %B) { 95; CHECK-LABEL: vrsubhni16: 96; CHECK: @ %bb.0: 97; CHECK-NEXT: vrsubhn.i16 d0, q0, q1 98; CHECK-NEXT: bx lr 99 %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %A, <8 x i16> %B) 100 ret <8 x i8> %tmp3 101} 102 103define <4 x i16> @vrsubhni32(<4 x i32> %A, <4 x i32> %B) { 104; CHECK-LABEL: vrsubhni32: 105; CHECK: @ %bb.0: 106; CHECK-NEXT: vrsubhn.i32 d0, q0, q1 107; CHECK-NEXT: bx lr 108 %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %A, <4 x i32> %B) 109 ret <4 x i16> %tmp3 110} 111 112define <2 x i32> @vrsubhni64(<2 x i64> %A, <2 x i64> %B) { 113; CHECK-LABEL: vrsubhni64: 114; CHECK: @ %bb.0: 115; CHECK-NEXT: vrsubhn.i64 d0, q0, q1 116; CHECK-NEXT: bx lr 117 %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %A, <2 x i64> %B) 118 ret <2 x i32> %tmp3 119} 120 121declare <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) readnone 122declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) readnone 123declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) readnone 124 125define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) { 126; CHECK-LABEL: vsubhni16_natural: 127; CHECK: @ %bb.0: 128; CHECK-NEXT: vsubhn.i16 d0, q0, q1 129; CHECK-NEXT: bx lr 130 %sum = sub <8 x i16> %A, %B 131 %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 132 %trunc = trunc <8 x i16> %shift to <8 x i8> 133 ret <8 x i8> %trunc 134} 135 136define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) { 137; CHECK-LABEL: vsubhni32_natural: 138; CHECK: @ %bb.0: 139; CHECK-NEXT: vsubhn.i32 d0, q0, q1 140; CHECK-NEXT: bx lr 141 %sum = sub <4 x i32> %A, %B 142 %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 143 %trunc = trunc <4 x i32> %shift to <4 x i16> 144 ret <4 x i16> %trunc 145} 146 147define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) { 148; CHECK-LABEL: vsubhni64_natural: 149; CHECK: @ %bb.0: 150; CHECK-NEXT: vsubhn.i64 d0, q0, q1 151; CHECK-NEXT: bx lr 152 %sum = sub <2 x i64> %A, %B 153 %shift = lshr <2 x i64> %sum, <i64 32, i64 32> 154 %trunc = trunc <2 x i64> %shift to <2 x i32> 155 ret <2 x i32> %trunc 156} 157 158define <8 x i16> @vsubls8(<8 x i8> %A, <8 x i8> %B) { 159; CHECK-LABEL: vsubls8: 160; CHECK: @ %bb.0: 161; CHECK-NEXT: vsubl.s8 q0, d0, d1 162; CHECK-NEXT: bx lr 163 %tmp3 = sext <8 x i8> %A to <8 x i16> 164 %tmp4 = sext <8 x i8> %B to <8 x i16> 165 %tmp5 = sub <8 x i16> %tmp3, %tmp4 166 ret <8 x i16> %tmp5 167} 168 169define <4 x i32> @vsubls16(<4 x i16> %A, <4 x i16> %B) { 170; CHECK-LABEL: vsubls16: 171; CHECK: @ %bb.0: 172; CHECK-NEXT: vsubl.s16 q0, d0, d1 173; CHECK-NEXT: bx lr 174 %tmp3 = sext <4 x i16> %A to <4 x i32> 175 %tmp4 = sext <4 x i16> %B to <4 x i32> 176 %tmp5 = sub <4 x i32> %tmp3, %tmp4 177 ret <4 x i32> %tmp5 178} 179 180define <2 x i64> @vsubls32(<2 x i32> %A, <2 x i32> %B) { 181; CHECK-LABEL: vsubls32: 182; CHECK: @ %bb.0: 183; CHECK-NEXT: vsubl.s32 q0, d0, d1 184; CHECK-NEXT: bx lr 185 %tmp3 = sext <2 x i32> %A to <2 x i64> 186 %tmp4 = sext <2 x i32> %B to <2 x i64> 187 %tmp5 = sub <2 x i64> %tmp3, %tmp4 188 ret <2 x i64> %tmp5 189} 190 191define <8 x i16> @vsublu8(<8 x i8> %A, <8 x i8> %B) { 192; CHECK-LABEL: vsublu8: 193; CHECK: @ %bb.0: 194; CHECK-NEXT: vsubl.u8 q0, d0, d1 195; CHECK-NEXT: bx lr 196 %tmp3 = zext <8 x i8> %A to <8 x i16> 197 %tmp4 = zext <8 x i8> %B to <8 x i16> 198 %tmp5 = sub <8 x i16> %tmp3, %tmp4 199 ret <8 x i16> %tmp5 200} 201 202define <4 x i32> @vsublu16(<4 x i16> %A, <4 x i16> %B) { 203; CHECK-LABEL: vsublu16: 204; CHECK: @ %bb.0: 205; CHECK-NEXT: vsubl.u16 q0, d0, d1 206; CHECK-NEXT: bx lr 207 %tmp3 = zext <4 x i16> %A to <4 x i32> 208 %tmp4 = zext <4 x i16> %B to <4 x i32> 209 %tmp5 = sub <4 x i32> %tmp3, %tmp4 210 ret <4 x i32> %tmp5 211} 212 213define <2 x i64> @vsublu32(<2 x i32> %A, <2 x i32> %B) { 214; CHECK-LABEL: vsublu32: 215; CHECK: @ %bb.0: 216; CHECK-NEXT: vsubl.u32 q0, d0, d1 217; CHECK-NEXT: bx lr 218 %tmp3 = zext <2 x i32> %A to <2 x i64> 219 %tmp4 = zext <2 x i32> %B to <2 x i64> 220 %tmp5 = sub <2 x i64> %tmp3, %tmp4 221 ret <2 x i64> %tmp5 222} 223 224define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) { 225; CHECK-LABEL: vsubla8: 226; CHECK: @ %bb.0: 227; CHECK-NEXT: vsubl.u8 q0, d0, d1 228; CHECK-NEXT: vbic.i16 q0, #0xff00 229; CHECK-NEXT: bx lr 230 %tmp3 = zext <8 x i8> %A to <8 x i16> 231 %tmp4 = zext <8 x i8> %B to <8 x i16> 232 %tmp5 = sub <8 x i16> %tmp3, %tmp4 233 %and = and <8 x i16> %tmp5, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 234 ret <8 x i16> %and 235} 236 237define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) { 238; CHECK-LABEL: vsubla16: 239; CHECK: @ %bb.0: 240; CHECK-NEXT: vmov.i32 q8, #0xffff 241; CHECK-NEXT: vsubl.u16 q9, d0, d1 242; CHECK-NEXT: vand q0, q9, q8 243; CHECK-NEXT: bx lr 244 %tmp3 = zext <4 x i16> %A to <4 x i32> 245 %tmp4 = zext <4 x i16> %B to <4 x i32> 246 %tmp5 = sub <4 x i32> %tmp3, %tmp4 247 %and = and <4 x i32> %tmp5, <i32 65535, i32 65535, i32 65535, i32 65535> 248 ret <4 x i32> %and 249} 250 251define <2 x i64> @vsubla32(<2 x i32> %A, <2 x i32> %B) { 252; CHECK-LABEL: vsubla32: 253; CHECK: @ %bb.0: 254; CHECK-NEXT: vmov.i64 q8, #0xffffffff 255; CHECK-NEXT: vsubl.u32 q9, d0, d1 256; CHECK-NEXT: vand q0, q9, q8 257; CHECK-NEXT: bx lr 258 %tmp3 = zext <2 x i32> %A to <2 x i64> 259 %tmp4 = zext <2 x i32> %B to <2 x i64> 260 %tmp5 = sub <2 x i64> %tmp3, %tmp4 261 %and = and <2 x i64> %tmp5, <i64 4294967295, i64 4294967295> 262 ret <2 x i64> %and 263} 264 265define <8 x i16> @vsubws8(<8 x i16> %A, <8 x i8> %B) { 266; CHECK-LABEL: vsubws8: 267; CHECK: @ %bb.0: 268; CHECK-NEXT: vsubw.s8 q0, q0, d2 269; CHECK-NEXT: bx lr 270 %tmp3 = sext <8 x i8> %B to <8 x i16> 271 %tmp4 = sub <8 x i16> %A, %tmp3 272 ret <8 x i16> %tmp4 273} 274 275define <4 x i32> @vsubws16(<4 x i32> %A, <4 x i16> %B) { 276; CHECK-LABEL: vsubws16: 277; CHECK: @ %bb.0: 278; CHECK-NEXT: vsubw.s16 q0, q0, d2 279; CHECK-NEXT: bx lr 280 %tmp3 = sext <4 x i16> %B to <4 x i32> 281 %tmp4 = sub <4 x i32> %A, %tmp3 282 ret <4 x i32> %tmp4 283} 284 285define <2 x i64> @vsubws32(<2 x i64> %A, <2 x i32> %B) { 286; CHECK-LABEL: vsubws32: 287; CHECK: @ %bb.0: 288; CHECK-NEXT: vsubw.s32 q0, q0, d2 289; CHECK-NEXT: bx lr 290 %tmp3 = sext <2 x i32> %B to <2 x i64> 291 %tmp4 = sub <2 x i64> %A, %tmp3 292 ret <2 x i64> %tmp4 293} 294 295define <8 x i16> @vsubwu8(<8 x i16> %A, <8 x i8> %B) { 296; CHECK-LABEL: vsubwu8: 297; CHECK: @ %bb.0: 298; CHECK-NEXT: vsubw.u8 q0, q0, d2 299; CHECK-NEXT: bx lr 300 %tmp3 = zext <8 x i8> %B to <8 x i16> 301 %tmp4 = sub <8 x i16> %A, %tmp3 302 ret <8 x i16> %tmp4 303} 304 305define <4 x i32> @vsubwu16(<4 x i32> %A, <4 x i16> %B) { 306; CHECK-LABEL: vsubwu16: 307; CHECK: @ %bb.0: 308; CHECK-NEXT: vsubw.u16 q0, q0, d2 309; CHECK-NEXT: bx lr 310 %tmp3 = zext <4 x i16> %B to <4 x i32> 311 %tmp4 = sub <4 x i32> %A, %tmp3 312 ret <4 x i32> %tmp4 313} 314 315define <2 x i64> @vsubwu32(<2 x i64> %A, <2 x i32> %B) { 316; CHECK-LABEL: vsubwu32: 317; CHECK: @ %bb.0: 318; CHECK-NEXT: vsubw.u32 q0, q0, d2 319; CHECK-NEXT: bx lr 320 %tmp3 = zext <2 x i32> %B to <2 x i64> 321 %tmp4 = sub <2 x i64> %A, %tmp3 322 ret <2 x i64> %tmp4 323} 324 325define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) { 326; CHECK-LABEL: vsubwa8: 327; CHECK: @ %bb.0: 328; CHECK-NEXT: vsubw.u8 q0, q0, d2 329; CHECK-NEXT: vbic.i16 q0, #0xff00 330; CHECK-NEXT: bx lr 331 %tmp3 = zext <8 x i8> %B to <8 x i16> 332 %tmp4 = sub <8 x i16> %A, %tmp3 333 %and = and <8 x i16> %tmp4, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255> 334 ret <8 x i16> %and 335} 336 337define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) { 338; CHECK-LABEL: vsubwa16: 339; CHECK: @ %bb.0: 340; CHECK-NEXT: vmov.i32 q8, #0xffff 341; CHECK-NEXT: vsubw.u16 q9, q0, d2 342; CHECK-NEXT: vand q0, q9, q8 343; CHECK-NEXT: bx lr 344 %tmp3 = zext <4 x i16> %B to <4 x i32> 345 %tmp4 = sub <4 x i32> %A, %tmp3 346 %and = and <4 x i32> %tmp4, <i32 65535, i32 65535, i32 65535, i32 65535> 347 ret <4 x i32> %and 348} 349 350define <2 x i64> @vsubwa32(<2 x i64> %A, <2 x i32> %B) { 351; CHECK-LABEL: vsubwa32: 352; CHECK: @ %bb.0: 353; CHECK-NEXT: vmov.i64 q8, #0xffffffff 354; CHECK-NEXT: vsubw.u32 q9, q0, d2 355; CHECK-NEXT: vand q0, q9, q8 356; CHECK-NEXT: bx lr 357 %tmp3 = zext <2 x i32> %B to <2 x i64> 358 %tmp4 = sub <2 x i64> %A, %tmp3 359 %and = and <2 x i64> %tmp4, <i64 4294967295, i64 4294967295> 360 ret <2 x i64> %and 361} 362