1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix SOFT 3; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix HARD 4 5define i64 @test_i64_f64(double %p) { 6; SOFT-LABEL: test_i64_f64: 7; SOFT: @ %bb.0: 8; SOFT-NEXT: vmov d16, r1, r0 9; SOFT-NEXT: vadd.f64 d16, d16, d16 10; SOFT-NEXT: vmov r0, r2, d16 11; SOFT-NEXT: adds r1, r0, r0 12; SOFT-NEXT: adc r0, r2, r2 13; SOFT-NEXT: bx lr 14; 15; HARD-LABEL: test_i64_f64: 16; HARD: @ %bb.0: 17; HARD-NEXT: vadd.f64 d16, d0, d0 18; HARD-NEXT: vmov r0, r2, d16 19; HARD-NEXT: adds r1, r0, r0 20; HARD-NEXT: adc r0, r2, r2 21; HARD-NEXT: bx lr 22 %1 = fadd double %p, %p 23 %2 = bitcast double %1 to i64 24 %3 = add i64 %2, %2 25 ret i64 %3 26} 27 28define i64 @test_i64_v1i64(<1 x i64> %p) { 29; SOFT-LABEL: test_i64_v1i64: 30; SOFT: @ %bb.0: 31; SOFT-NEXT: vmov d16, r1, r0 32; SOFT-NEXT: vadd.i64 d16, d16, d16 33; SOFT-NEXT: vmov r0, r2, d16 34; SOFT-NEXT: adds r1, r0, r0 35; SOFT-NEXT: adc r0, r2, r2 36; SOFT-NEXT: bx lr 37; 38; HARD-LABEL: test_i64_v1i64: 39; HARD: @ %bb.0: 40; HARD-NEXT: vadd.i64 d16, d0, d0 41; HARD-NEXT: vmov r0, r2, d16 42; HARD-NEXT: adds r1, r0, r0 43; HARD-NEXT: adc r0, r2, r2 44; HARD-NEXT: bx lr 45 %1 = add <1 x i64> %p, %p 46 %2 = bitcast <1 x i64> %1 to i64 47 %3 = add i64 %2, %2 48 ret i64 %3 49} 50 51define i64 @test_i64_v2f32(<2 x float> %p) { 52; SOFT-LABEL: test_i64_v2f32: 53; SOFT: @ %bb.0: 54; SOFT-NEXT: vmov d16, r1, r0 55; SOFT-NEXT: vrev64.32 d16, d16 56; SOFT-NEXT: vadd.f32 d16, d16, d16 57; SOFT-NEXT: vrev64.32 d16, d16 58; SOFT-NEXT: vmov r0, r2, d16 59; SOFT-NEXT: adds r1, r0, r0 60; SOFT-NEXT: adc r0, r2, r2 61; SOFT-NEXT: bx lr 62; 63; HARD-LABEL: test_i64_v2f32: 64; HARD: @ %bb.0: 65; HARD-NEXT: vrev64.32 d16, d0 66; HARD-NEXT: vadd.f32 d16, d16, d16 67; HARD-NEXT: vrev64.32 d16, d16 68; HARD-NEXT: vmov r0, r2, d16 69; HARD-NEXT: adds r1, r0, r0 70; HARD-NEXT: adc r0, r2, r2 71; HARD-NEXT: bx lr 72 %1 = fadd <2 x float> %p, %p 73 %2 = bitcast <2 x float> %1 to i64 74 %3 = add i64 %2, %2 75 ret i64 %3 76} 77 78define i64 @test_i64_v2i32(<2 x i32> %p) { 79; SOFT-LABEL: test_i64_v2i32: 80; SOFT: @ %bb.0: 81; SOFT-NEXT: vmov d16, r1, r0 82; SOFT-NEXT: vrev64.32 d16, d16 83; SOFT-NEXT: vadd.i32 d16, d16, d16 84; SOFT-NEXT: vrev64.32 d16, d16 85; SOFT-NEXT: vmov r0, r2, d16 86; SOFT-NEXT: adds r1, r0, r0 87; SOFT-NEXT: adc r0, r2, r2 88; SOFT-NEXT: bx lr 89; 90; HARD-LABEL: test_i64_v2i32: 91; HARD: @ %bb.0: 92; HARD-NEXT: vrev64.32 d16, d0 93; HARD-NEXT: vadd.i32 d16, d16, d16 94; HARD-NEXT: vrev64.32 d16, d16 95; HARD-NEXT: vmov r0, r2, d16 96; HARD-NEXT: adds r1, r0, r0 97; HARD-NEXT: adc r0, r2, r2 98; HARD-NEXT: bx lr 99 %1 = add <2 x i32> %p, %p 100 %2 = bitcast <2 x i32> %1 to i64 101 %3 = add i64 %2, %2 102 ret i64 %3 103} 104 105define i64 @test_i64_v4i16(<4 x i16> %p) { 106; SOFT-LABEL: test_i64_v4i16: 107; SOFT: @ %bb.0: 108; SOFT-NEXT: vmov d16, r1, r0 109; SOFT-NEXT: vrev64.16 d16, d16 110; SOFT-NEXT: vadd.i16 d16, d16, d16 111; SOFT-NEXT: vrev64.16 d16, d16 112; SOFT-NEXT: vmov r0, r2, d16 113; SOFT-NEXT: adds r1, r0, r0 114; SOFT-NEXT: adc r0, r2, r2 115; SOFT-NEXT: bx lr 116; 117; HARD-LABEL: test_i64_v4i16: 118; HARD: @ %bb.0: 119; HARD-NEXT: vrev64.16 d16, d0 120; HARD-NEXT: vadd.i16 d16, d16, d16 121; HARD-NEXT: vrev64.16 d16, d16 122; HARD-NEXT: vmov r0, r2, d16 123; HARD-NEXT: adds r1, r0, r0 124; HARD-NEXT: adc r0, r2, r2 125; HARD-NEXT: bx lr 126 %1 = add <4 x i16> %p, %p 127 %2 = bitcast <4 x i16> %1 to i64 128 %3 = add i64 %2, %2 129 ret i64 %3 130} 131 132define i64 @test_i64_v8i8(<8 x i8> %p) { 133; SOFT-LABEL: test_i64_v8i8: 134; SOFT: @ %bb.0: 135; SOFT-NEXT: vmov d16, r1, r0 136; SOFT-NEXT: vrev64.8 d16, d16 137; SOFT-NEXT: vadd.i8 d16, d16, d16 138; SOFT-NEXT: vrev64.8 d16, d16 139; SOFT-NEXT: vmov r0, r2, d16 140; SOFT-NEXT: adds r1, r0, r0 141; SOFT-NEXT: adc r0, r2, r2 142; SOFT-NEXT: bx lr 143; 144; HARD-LABEL: test_i64_v8i8: 145; HARD: @ %bb.0: 146; HARD-NEXT: vrev64.8 d16, d0 147; HARD-NEXT: vadd.i8 d16, d16, d16 148; HARD-NEXT: vrev64.8 d16, d16 149; HARD-NEXT: vmov r0, r2, d16 150; HARD-NEXT: adds r1, r0, r0 151; HARD-NEXT: adc r0, r2, r2 152; HARD-NEXT: bx lr 153 %1 = add <8 x i8> %p, %p 154 %2 = bitcast <8 x i8> %1 to i64 155 %3 = add i64 %2, %2 156 ret i64 %3 157} 158 159define double @test_f64_i64(i64 %p) { 160; SOFT-LABEL: test_f64_i64: 161; SOFT: @ %bb.0: 162; SOFT-NEXT: adds r1, r1, r1 163; SOFT-NEXT: adc r0, r0, r0 164; SOFT-NEXT: vmov d16, r1, r0 165; SOFT-NEXT: vadd.f64 d16, d16, d16 166; SOFT-NEXT: vmov r1, r0, d16 167; SOFT-NEXT: bx lr 168; 169; HARD-LABEL: test_f64_i64: 170; HARD: @ %bb.0: 171; HARD-NEXT: adds r1, r1, r1 172; HARD-NEXT: adc r0, r0, r0 173; HARD-NEXT: vmov d16, r1, r0 174; HARD-NEXT: vadd.f64 d0, d16, d16 175; HARD-NEXT: bx lr 176 %1 = add i64 %p, %p 177 %2 = bitcast i64 %1 to double 178 %3 = fadd double %2, %2 179 ret double %3 180} 181 182define double @test_f64_v1i64(<1 x i64> %p) { 183; SOFT-LABEL: test_f64_v1i64: 184; SOFT: @ %bb.0: 185; SOFT-NEXT: vmov d16, r1, r0 186; SOFT-NEXT: vadd.i64 d16, d16, d16 187; SOFT-NEXT: vadd.f64 d16, d16, d16 188; SOFT-NEXT: vmov r1, r0, d16 189; SOFT-NEXT: bx lr 190; 191; HARD-LABEL: test_f64_v1i64: 192; HARD: @ %bb.0: 193; HARD-NEXT: vadd.i64 d16, d0, d0 194; HARD-NEXT: vadd.f64 d0, d16, d16 195; HARD-NEXT: bx lr 196 %1 = add <1 x i64> %p, %p 197 %2 = bitcast <1 x i64> %1 to double 198 %3 = fadd double %2, %2 199 ret double %3 200} 201 202define double @test_f64_v2f32(<2 x float> %p) { 203; SOFT-LABEL: test_f64_v2f32: 204; SOFT: @ %bb.0: 205; SOFT-NEXT: vmov d16, r1, r0 206; SOFT-NEXT: vrev64.32 d16, d16 207; SOFT-NEXT: vadd.f32 d16, d16, d16 208; SOFT-NEXT: vrev64.32 d16, d16 209; SOFT-NEXT: vadd.f64 d16, d16, d16 210; SOFT-NEXT: vmov r1, r0, d16 211; SOFT-NEXT: bx lr 212; 213; HARD-LABEL: test_f64_v2f32: 214; HARD: @ %bb.0: 215; HARD-NEXT: vrev64.32 d16, d0 216; HARD-NEXT: vadd.f32 d16, d16, d16 217; HARD-NEXT: vrev64.32 d16, d16 218; HARD-NEXT: vadd.f64 d0, d16, d16 219; HARD-NEXT: bx lr 220 %1 = fadd <2 x float> %p, %p 221 %2 = bitcast <2 x float> %1 to double 222 %3 = fadd double %2, %2 223 ret double %3 224} 225 226define double @test_f64_v2i32(<2 x i32> %p) { 227; SOFT-LABEL: test_f64_v2i32: 228; SOFT: @ %bb.0: 229; SOFT-NEXT: vmov d16, r1, r0 230; SOFT-NEXT: vrev64.32 d16, d16 231; SOFT-NEXT: vadd.i32 d16, d16, d16 232; SOFT-NEXT: vrev64.32 d16, d16 233; SOFT-NEXT: vadd.f64 d16, d16, d16 234; SOFT-NEXT: vmov r1, r0, d16 235; SOFT-NEXT: bx lr 236; 237; HARD-LABEL: test_f64_v2i32: 238; HARD: @ %bb.0: 239; HARD-NEXT: vrev64.32 d16, d0 240; HARD-NEXT: vadd.i32 d16, d16, d16 241; HARD-NEXT: vrev64.32 d16, d16 242; HARD-NEXT: vadd.f64 d0, d16, d16 243; HARD-NEXT: bx lr 244 %1 = add <2 x i32> %p, %p 245 %2 = bitcast <2 x i32> %1 to double 246 %3 = fadd double %2, %2 247 ret double %3 248} 249 250define double @test_f64_v4i16(<4 x i16> %p) { 251; SOFT-LABEL: test_f64_v4i16: 252; SOFT: @ %bb.0: 253; SOFT-NEXT: vmov d16, r1, r0 254; SOFT-NEXT: vrev64.16 d16, d16 255; SOFT-NEXT: vadd.i16 d16, d16, d16 256; SOFT-NEXT: vrev64.16 d16, d16 257; SOFT-NEXT: vadd.f64 d16, d16, d16 258; SOFT-NEXT: vmov r1, r0, d16 259; SOFT-NEXT: bx lr 260; 261; HARD-LABEL: test_f64_v4i16: 262; HARD: @ %bb.0: 263; HARD-NEXT: vrev64.16 d16, d0 264; HARD-NEXT: vadd.i16 d16, d16, d16 265; HARD-NEXT: vrev64.16 d16, d16 266; HARD-NEXT: vadd.f64 d0, d16, d16 267; HARD-NEXT: bx lr 268 %1 = add <4 x i16> %p, %p 269 %2 = bitcast <4 x i16> %1 to double 270 %3 = fadd double %2, %2 271 ret double %3 272} 273 274define double @test_f64_v8i8(<8 x i8> %p) { 275; SOFT-LABEL: test_f64_v8i8: 276; SOFT: @ %bb.0: 277; SOFT-NEXT: vmov d16, r1, r0 278; SOFT-NEXT: vrev64.8 d16, d16 279; SOFT-NEXT: vadd.i8 d16, d16, d16 280; SOFT-NEXT: vrev64.8 d16, d16 281; SOFT-NEXT: vadd.f64 d16, d16, d16 282; SOFT-NEXT: vmov r1, r0, d16 283; SOFT-NEXT: bx lr 284; 285; HARD-LABEL: test_f64_v8i8: 286; HARD: @ %bb.0: 287; HARD-NEXT: vrev64.8 d16, d0 288; HARD-NEXT: vadd.i8 d16, d16, d16 289; HARD-NEXT: vrev64.8 d16, d16 290; HARD-NEXT: vadd.f64 d0, d16, d16 291; HARD-NEXT: bx lr 292 %1 = add <8 x i8> %p, %p 293 %2 = bitcast <8 x i8> %1 to double 294 %3 = fadd double %2, %2 295 ret double %3 296} 297 298define <1 x i64> @test_v1i64_i64(i64 %p) { 299; SOFT-LABEL: test_v1i64_i64: 300; SOFT: @ %bb.0: 301; SOFT-NEXT: adds r1, r1, r1 302; SOFT-NEXT: adc r0, r0, r0 303; SOFT-NEXT: vmov d16, r1, r0 304; SOFT-NEXT: vadd.i64 d16, d16, d16 305; SOFT-NEXT: vmov r1, r0, d16 306; SOFT-NEXT: bx lr 307; 308; HARD-LABEL: test_v1i64_i64: 309; HARD: @ %bb.0: 310; HARD-NEXT: adds r1, r1, r1 311; HARD-NEXT: adc r0, r0, r0 312; HARD-NEXT: vmov d16, r1, r0 313; HARD-NEXT: vadd.i64 d0, d16, d16 314; HARD-NEXT: bx lr 315 %1 = add i64 %p, %p 316 %2 = bitcast i64 %1 to <1 x i64> 317 %3 = add <1 x i64> %2, %2 318 ret <1 x i64> %3 319} 320 321define <1 x i64> @test_v1i64_f64(double %p) { 322; SOFT-LABEL: test_v1i64_f64: 323; SOFT: @ %bb.0: 324; SOFT-NEXT: vmov d16, r1, r0 325; SOFT-NEXT: vadd.f64 d16, d16, d16 326; SOFT-NEXT: vadd.i64 d16, d16, d16 327; SOFT-NEXT: vmov r1, r0, d16 328; SOFT-NEXT: bx lr 329; 330; HARD-LABEL: test_v1i64_f64: 331; HARD: @ %bb.0: 332; HARD-NEXT: vadd.f64 d16, d0, d0 333; HARD-NEXT: vadd.i64 d0, d16, d16 334; HARD-NEXT: bx lr 335 %1 = fadd double %p, %p 336 %2 = bitcast double %1 to <1 x i64> 337 %3 = add <1 x i64> %2, %2 338 ret <1 x i64> %3 339} 340 341define <1 x i64> @test_v1i64_v2f32(<2 x float> %p) { 342; SOFT-LABEL: test_v1i64_v2f32: 343; SOFT: @ %bb.0: 344; SOFT-NEXT: vmov d16, r1, r0 345; SOFT-NEXT: vrev64.32 d16, d16 346; SOFT-NEXT: vadd.f32 d16, d16, d16 347; SOFT-NEXT: vrev64.32 d16, d16 348; SOFT-NEXT: vadd.i64 d16, d16, d16 349; SOFT-NEXT: vmov r1, r0, d16 350; SOFT-NEXT: bx lr 351; 352; HARD-LABEL: test_v1i64_v2f32: 353; HARD: @ %bb.0: 354; HARD-NEXT: vrev64.32 d16, d0 355; HARD-NEXT: vadd.f32 d16, d16, d16 356; HARD-NEXT: vrev64.32 d16, d16 357; HARD-NEXT: vadd.i64 d0, d16, d16 358; HARD-NEXT: bx lr 359 %1 = fadd <2 x float> %p, %p 360 %2 = bitcast <2 x float> %1 to <1 x i64> 361 %3 = add <1 x i64> %2, %2 362 ret <1 x i64> %3 363} 364 365define <1 x i64> @test_v1i64_v2i32(<2 x i32> %p) { 366; SOFT-LABEL: test_v1i64_v2i32: 367; SOFT: @ %bb.0: 368; SOFT-NEXT: vmov d16, r1, r0 369; SOFT-NEXT: vrev64.32 d16, d16 370; SOFT-NEXT: vadd.i32 d16, d16, d16 371; SOFT-NEXT: vrev64.32 d16, d16 372; SOFT-NEXT: vadd.i64 d16, d16, d16 373; SOFT-NEXT: vmov r1, r0, d16 374; SOFT-NEXT: bx lr 375; 376; HARD-LABEL: test_v1i64_v2i32: 377; HARD: @ %bb.0: 378; HARD-NEXT: vrev64.32 d16, d0 379; HARD-NEXT: vadd.i32 d16, d16, d16 380; HARD-NEXT: vrev64.32 d16, d16 381; HARD-NEXT: vadd.i64 d0, d16, d16 382; HARD-NEXT: bx lr 383 %1 = add <2 x i32> %p, %p 384 %2 = bitcast <2 x i32> %1 to <1 x i64> 385 %3 = add <1 x i64> %2, %2 386 ret <1 x i64> %3 387} 388 389define <1 x i64> @test_v1i64_v4i16(<4 x i16> %p) { 390; SOFT-LABEL: test_v1i64_v4i16: 391; SOFT: @ %bb.0: 392; SOFT-NEXT: vmov d16, r1, r0 393; SOFT-NEXT: vrev64.16 d16, d16 394; SOFT-NEXT: vadd.i16 d16, d16, d16 395; SOFT-NEXT: vrev64.16 d16, d16 396; SOFT-NEXT: vadd.i64 d16, d16, d16 397; SOFT-NEXT: vmov r1, r0, d16 398; SOFT-NEXT: bx lr 399; 400; HARD-LABEL: test_v1i64_v4i16: 401; HARD: @ %bb.0: 402; HARD-NEXT: vrev64.16 d16, d0 403; HARD-NEXT: vadd.i16 d16, d16, d16 404; HARD-NEXT: vrev64.16 d16, d16 405; HARD-NEXT: vadd.i64 d0, d16, d16 406; HARD-NEXT: bx lr 407 %1 = add <4 x i16> %p, %p 408 %2 = bitcast <4 x i16> %1 to <1 x i64> 409 %3 = add <1 x i64> %2, %2 410 ret <1 x i64> %3 411} 412 413define <1 x i64> @test_v1i64_v8i8(<8 x i8> %p) { 414; SOFT-LABEL: test_v1i64_v8i8: 415; SOFT: @ %bb.0: 416; SOFT-NEXT: vmov d16, r1, r0 417; SOFT-NEXT: vrev64.8 d16, d16 418; SOFT-NEXT: vadd.i8 d16, d16, d16 419; SOFT-NEXT: vrev64.8 d16, d16 420; SOFT-NEXT: vadd.i64 d16, d16, d16 421; SOFT-NEXT: vmov r1, r0, d16 422; SOFT-NEXT: bx lr 423; 424; HARD-LABEL: test_v1i64_v8i8: 425; HARD: @ %bb.0: 426; HARD-NEXT: vrev64.8 d16, d0 427; HARD-NEXT: vadd.i8 d16, d16, d16 428; HARD-NEXT: vrev64.8 d16, d16 429; HARD-NEXT: vadd.i64 d0, d16, d16 430; HARD-NEXT: bx lr 431 %1 = add <8 x i8> %p, %p 432 %2 = bitcast <8 x i8> %1 to <1 x i64> 433 %3 = add <1 x i64> %2, %2 434 ret <1 x i64> %3 435} 436 437define <2 x float> @test_v2f32_i64(i64 %p) { 438; SOFT-LABEL: test_v2f32_i64: 439; SOFT: @ %bb.0: 440; SOFT-NEXT: adds r1, r1, r1 441; SOFT-NEXT: adc r0, r0, r0 442; SOFT-NEXT: vmov d16, r1, r0 443; SOFT-NEXT: vrev64.32 d16, d16 444; SOFT-NEXT: vadd.f32 d16, d16, d16 445; SOFT-NEXT: vrev64.32 d16, d16 446; SOFT-NEXT: vmov r1, r0, d16 447; SOFT-NEXT: bx lr 448; 449; HARD-LABEL: test_v2f32_i64: 450; HARD: @ %bb.0: 451; HARD-NEXT: adds r1, r1, r1 452; HARD-NEXT: adc r0, r0, r0 453; HARD-NEXT: vmov d16, r1, r0 454; HARD-NEXT: vrev64.32 d16, d16 455; HARD-NEXT: vadd.f32 d16, d16, d16 456; HARD-NEXT: vrev64.32 d0, d16 457; HARD-NEXT: bx lr 458 %1 = add i64 %p, %p 459 %2 = bitcast i64 %1 to <2 x float> 460 %3 = fadd <2 x float> %2, %2 461 ret <2 x float> %3 462} 463 464define <2 x float> @test_v2f32_f64(double %p) { 465; SOFT-LABEL: test_v2f32_f64: 466; SOFT: @ %bb.0: 467; SOFT-NEXT: vmov d16, r1, r0 468; SOFT-NEXT: vadd.f64 d16, d16, d16 469; SOFT-NEXT: vrev64.32 d16, d16 470; SOFT-NEXT: vadd.f32 d16, d16, d16 471; SOFT-NEXT: vrev64.32 d16, d16 472; SOFT-NEXT: vmov r1, r0, d16 473; SOFT-NEXT: bx lr 474; 475; HARD-LABEL: test_v2f32_f64: 476; HARD: @ %bb.0: 477; HARD-NEXT: vadd.f64 d16, d0, d0 478; HARD-NEXT: vrev64.32 d16, d16 479; HARD-NEXT: vadd.f32 d16, d16, d16 480; HARD-NEXT: vrev64.32 d0, d16 481; HARD-NEXT: bx lr 482 %1 = fadd double %p, %p 483 %2 = bitcast double %1 to <2 x float> 484 %3 = fadd <2 x float> %2, %2 485 ret <2 x float> %3 486} 487 488define <2 x float> @test_v2f32_v1i64(<1 x i64> %p) { 489; SOFT-LABEL: test_v2f32_v1i64: 490; SOFT: @ %bb.0: 491; SOFT-NEXT: vmov d16, r1, r0 492; SOFT-NEXT: vadd.i64 d16, d16, d16 493; SOFT-NEXT: vrev64.32 d16, d16 494; SOFT-NEXT: vadd.f32 d16, d16, d16 495; SOFT-NEXT: vrev64.32 d16, d16 496; SOFT-NEXT: vmov r1, r0, d16 497; SOFT-NEXT: bx lr 498; 499; HARD-LABEL: test_v2f32_v1i64: 500; HARD: @ %bb.0: 501; HARD-NEXT: vadd.i64 d16, d0, d0 502; HARD-NEXT: vrev64.32 d16, d16 503; HARD-NEXT: vadd.f32 d16, d16, d16 504; HARD-NEXT: vrev64.32 d0, d16 505; HARD-NEXT: bx lr 506 %1 = add <1 x i64> %p, %p 507 %2 = bitcast <1 x i64> %1 to <2 x float> 508 %3 = fadd <2 x float> %2, %2 509 ret <2 x float> %3 510} 511 512define <2 x float> @test_v2f32_v2i32(<2 x i32> %p) { 513; SOFT-LABEL: test_v2f32_v2i32: 514; SOFT: @ %bb.0: 515; SOFT-NEXT: vmov d16, r1, r0 516; SOFT-NEXT: vrev64.32 d16, d16 517; SOFT-NEXT: vadd.i32 d16, d16, d16 518; SOFT-NEXT: vadd.f32 d16, d16, d16 519; SOFT-NEXT: vrev64.32 d16, d16 520; SOFT-NEXT: vmov r1, r0, d16 521; SOFT-NEXT: bx lr 522; 523; HARD-LABEL: test_v2f32_v2i32: 524; HARD: @ %bb.0: 525; HARD-NEXT: vrev64.32 d16, d0 526; HARD-NEXT: vadd.i32 d16, d16, d16 527; HARD-NEXT: vadd.f32 d16, d16, d16 528; HARD-NEXT: vrev64.32 d0, d16 529; HARD-NEXT: bx lr 530 %1 = add <2 x i32> %p, %p 531 %2 = bitcast <2 x i32> %1 to <2 x float> 532 %3 = fadd <2 x float> %2, %2 533 ret <2 x float> %3 534} 535 536define <2 x float> @test_v2f32_v4i16(<4 x i16> %p) { 537; SOFT-LABEL: test_v2f32_v4i16: 538; SOFT: @ %bb.0: 539; SOFT-NEXT: vmov d16, r1, r0 540; SOFT-NEXT: vrev64.16 d16, d16 541; SOFT-NEXT: vadd.i16 d16, d16, d16 542; SOFT-NEXT: vrev32.16 d16, d16 543; SOFT-NEXT: vadd.f32 d16, d16, d16 544; SOFT-NEXT: vrev64.32 d16, d16 545; SOFT-NEXT: vmov r1, r0, d16 546; SOFT-NEXT: bx lr 547; 548; HARD-LABEL: test_v2f32_v4i16: 549; HARD: @ %bb.0: 550; HARD-NEXT: vrev64.16 d16, d0 551; HARD-NEXT: vadd.i16 d16, d16, d16 552; HARD-NEXT: vrev32.16 d16, d16 553; HARD-NEXT: vadd.f32 d16, d16, d16 554; HARD-NEXT: vrev64.32 d0, d16 555; HARD-NEXT: bx lr 556 %1 = add <4 x i16> %p, %p 557 %2 = bitcast <4 x i16> %1 to <2 x float> 558 %3 = fadd <2 x float> %2, %2 559 ret <2 x float> %3 560} 561 562define <2 x float> @test_v2f32_v8i8(<8 x i8> %p) { 563; SOFT-LABEL: test_v2f32_v8i8: 564; SOFT: @ %bb.0: 565; SOFT-NEXT: vmov d16, r1, r0 566; SOFT-NEXT: vrev64.8 d16, d16 567; SOFT-NEXT: vadd.i8 d16, d16, d16 568; SOFT-NEXT: vrev32.8 d16, d16 569; SOFT-NEXT: vadd.f32 d16, d16, d16 570; SOFT-NEXT: vrev64.32 d16, d16 571; SOFT-NEXT: vmov r1, r0, d16 572; SOFT-NEXT: bx lr 573; 574; HARD-LABEL: test_v2f32_v8i8: 575; HARD: @ %bb.0: 576; HARD-NEXT: vrev64.8 d16, d0 577; HARD-NEXT: vadd.i8 d16, d16, d16 578; HARD-NEXT: vrev32.8 d16, d16 579; HARD-NEXT: vadd.f32 d16, d16, d16 580; HARD-NEXT: vrev64.32 d0, d16 581; HARD-NEXT: bx lr 582 %1 = add <8 x i8> %p, %p 583 %2 = bitcast <8 x i8> %1 to <2 x float> 584 %3 = fadd <2 x float> %2, %2 585 ret <2 x float> %3 586} 587 588define <2 x i32> @test_v2i32_i64(i64 %p) { 589; SOFT-LABEL: test_v2i32_i64: 590; SOFT: @ %bb.0: 591; SOFT-NEXT: adds r1, r1, r1 592; SOFT-NEXT: adc r0, r0, r0 593; SOFT-NEXT: vmov d16, r1, r0 594; SOFT-NEXT: vrev64.32 d16, d16 595; SOFT-NEXT: vadd.i32 d16, d16, d16 596; SOFT-NEXT: vrev64.32 d16, d16 597; SOFT-NEXT: vmov r1, r0, d16 598; SOFT-NEXT: bx lr 599; 600; HARD-LABEL: test_v2i32_i64: 601; HARD: @ %bb.0: 602; HARD-NEXT: adds r1, r1, r1 603; HARD-NEXT: adc r0, r0, r0 604; HARD-NEXT: vmov d16, r1, r0 605; HARD-NEXT: vrev64.32 d16, d16 606; HARD-NEXT: vadd.i32 d16, d16, d16 607; HARD-NEXT: vrev64.32 d0, d16 608; HARD-NEXT: bx lr 609 %1 = add i64 %p, %p 610 %2 = bitcast i64 %1 to <2 x i32> 611 %3 = add <2 x i32> %2, %2 612 ret <2 x i32> %3 613} 614 615define <2 x i32> @test_v2i32_f64(double %p) { 616; SOFT-LABEL: test_v2i32_f64: 617; SOFT: @ %bb.0: 618; SOFT-NEXT: vmov d16, r1, r0 619; SOFT-NEXT: vadd.f64 d16, d16, d16 620; SOFT-NEXT: vrev64.32 d16, d16 621; SOFT-NEXT: vadd.i32 d16, d16, d16 622; SOFT-NEXT: vrev64.32 d16, d16 623; SOFT-NEXT: vmov r1, r0, d16 624; SOFT-NEXT: bx lr 625; 626; HARD-LABEL: test_v2i32_f64: 627; HARD: @ %bb.0: 628; HARD-NEXT: vadd.f64 d16, d0, d0 629; HARD-NEXT: vrev64.32 d16, d16 630; HARD-NEXT: vadd.i32 d16, d16, d16 631; HARD-NEXT: vrev64.32 d0, d16 632; HARD-NEXT: bx lr 633 %1 = fadd double %p, %p 634 %2 = bitcast double %1 to <2 x i32> 635 %3 = add <2 x i32> %2, %2 636 ret <2 x i32> %3 637} 638 639define <2 x i32> @test_v2i32_v1i64(<1 x i64> %p) { 640; SOFT-LABEL: test_v2i32_v1i64: 641; SOFT: @ %bb.0: 642; SOFT-NEXT: vmov d16, r1, r0 643; SOFT-NEXT: vadd.i64 d16, d16, d16 644; SOFT-NEXT: vrev64.32 d16, d16 645; SOFT-NEXT: vadd.i32 d16, d16, d16 646; SOFT-NEXT: vrev64.32 d16, d16 647; SOFT-NEXT: vmov r1, r0, d16 648; SOFT-NEXT: bx lr 649; 650; HARD-LABEL: test_v2i32_v1i64: 651; HARD: @ %bb.0: 652; HARD-NEXT: vadd.i64 d16, d0, d0 653; HARD-NEXT: vrev64.32 d16, d16 654; HARD-NEXT: vadd.i32 d16, d16, d16 655; HARD-NEXT: vrev64.32 d0, d16 656; HARD-NEXT: bx lr 657 %1 = add <1 x i64> %p, %p 658 %2 = bitcast <1 x i64> %1 to <2 x i32> 659 %3 = add <2 x i32> %2, %2 660 ret <2 x i32> %3 661} 662 663define <2 x i32> @test_v2i32_v2f32(<2 x float> %p) { 664; SOFT-LABEL: test_v2i32_v2f32: 665; SOFT: @ %bb.0: 666; SOFT-NEXT: vmov d16, r1, r0 667; SOFT-NEXT: vrev64.32 d16, d16 668; SOFT-NEXT: vadd.f32 d16, d16, d16 669; SOFT-NEXT: vadd.i32 d16, d16, d16 670; SOFT-NEXT: vrev64.32 d16, d16 671; SOFT-NEXT: vmov r1, r0, d16 672; SOFT-NEXT: bx lr 673; 674; HARD-LABEL: test_v2i32_v2f32: 675; HARD: @ %bb.0: 676; HARD-NEXT: vrev64.32 d16, d0 677; HARD-NEXT: vadd.f32 d16, d16, d16 678; HARD-NEXT: vadd.i32 d16, d16, d16 679; HARD-NEXT: vrev64.32 d0, d16 680; HARD-NEXT: bx lr 681 %1 = fadd <2 x float> %p, %p 682 %2 = bitcast <2 x float> %1 to <2 x i32> 683 %3 = add <2 x i32> %2, %2 684 ret <2 x i32> %3 685} 686 687define <2 x i32> @test_v2i32_v4i16(<4 x i16> %p) { 688; SOFT-LABEL: test_v2i32_v4i16: 689; SOFT: @ %bb.0: 690; SOFT-NEXT: vmov d16, r1, r0 691; SOFT-NEXT: vrev64.16 d16, d16 692; SOFT-NEXT: vadd.i16 d16, d16, d16 693; SOFT-NEXT: vrev32.16 d16, d16 694; SOFT-NEXT: vadd.i32 d16, d16, d16 695; SOFT-NEXT: vrev64.32 d16, d16 696; SOFT-NEXT: vmov r1, r0, d16 697; SOFT-NEXT: bx lr 698; 699; HARD-LABEL: test_v2i32_v4i16: 700; HARD: @ %bb.0: 701; HARD-NEXT: vrev64.16 d16, d0 702; HARD-NEXT: vadd.i16 d16, d16, d16 703; HARD-NEXT: vrev32.16 d16, d16 704; HARD-NEXT: vadd.i32 d16, d16, d16 705; HARD-NEXT: vrev64.32 d0, d16 706; HARD-NEXT: bx lr 707 %1 = add <4 x i16> %p, %p 708 %2 = bitcast <4 x i16> %1 to <2 x i32> 709 %3 = add <2 x i32> %2, %2 710 ret <2 x i32> %3 711} 712 713define <2 x i32> @test_v2i32_v8i8(<8 x i8> %p) { 714; SOFT-LABEL: test_v2i32_v8i8: 715; SOFT: @ %bb.0: 716; SOFT-NEXT: vmov d16, r1, r0 717; SOFT-NEXT: vrev64.8 d16, d16 718; SOFT-NEXT: vadd.i8 d16, d16, d16 719; SOFT-NEXT: vrev32.8 d16, d16 720; SOFT-NEXT: vadd.i32 d16, d16, d16 721; SOFT-NEXT: vrev64.32 d16, d16 722; SOFT-NEXT: vmov r1, r0, d16 723; SOFT-NEXT: bx lr 724; 725; HARD-LABEL: test_v2i32_v8i8: 726; HARD: @ %bb.0: 727; HARD-NEXT: vrev64.8 d16, d0 728; HARD-NEXT: vadd.i8 d16, d16, d16 729; HARD-NEXT: vrev32.8 d16, d16 730; HARD-NEXT: vadd.i32 d16, d16, d16 731; HARD-NEXT: vrev64.32 d0, d16 732; HARD-NEXT: bx lr 733 %1 = add <8 x i8> %p, %p 734 %2 = bitcast <8 x i8> %1 to <2 x i32> 735 %3 = add <2 x i32> %2, %2 736 ret <2 x i32> %3 737} 738 739define <4 x i16> @test_v4i16_i64(i64 %p) { 740; SOFT-LABEL: test_v4i16_i64: 741; SOFT: @ %bb.0: 742; SOFT-NEXT: adds r1, r1, r1 743; SOFT-NEXT: adc r0, r0, r0 744; SOFT-NEXT: vmov d16, r1, r0 745; SOFT-NEXT: vrev64.16 d16, d16 746; SOFT-NEXT: vadd.i16 d16, d16, d16 747; SOFT-NEXT: vrev64.16 d16, d16 748; SOFT-NEXT: vmov r1, r0, d16 749; SOFT-NEXT: bx lr 750; 751; HARD-LABEL: test_v4i16_i64: 752; HARD: @ %bb.0: 753; HARD-NEXT: adds r1, r1, r1 754; HARD-NEXT: adc r0, r0, r0 755; HARD-NEXT: vmov d16, r1, r0 756; HARD-NEXT: vrev64.16 d16, d16 757; HARD-NEXT: vadd.i16 d16, d16, d16 758; HARD-NEXT: vrev64.16 d0, d16 759; HARD-NEXT: bx lr 760 %1 = add i64 %p, %p 761 %2 = bitcast i64 %1 to <4 x i16> 762 %3 = add <4 x i16> %2, %2 763 ret <4 x i16> %3 764} 765 766define <4 x i16> @test_v4i16_f64(double %p) { 767; SOFT-LABEL: test_v4i16_f64: 768; SOFT: @ %bb.0: 769; SOFT-NEXT: vmov d16, r1, r0 770; SOFT-NEXT: vadd.f64 d16, d16, d16 771; SOFT-NEXT: vrev64.16 d16, d16 772; SOFT-NEXT: vadd.i16 d16, d16, d16 773; SOFT-NEXT: vrev64.16 d16, d16 774; SOFT-NEXT: vmov r1, r0, d16 775; SOFT-NEXT: bx lr 776; 777; HARD-LABEL: test_v4i16_f64: 778; HARD: @ %bb.0: 779; HARD-NEXT: vadd.f64 d16, d0, d0 780; HARD-NEXT: vrev64.16 d16, d16 781; HARD-NEXT: vadd.i16 d16, d16, d16 782; HARD-NEXT: vrev64.16 d0, d16 783; HARD-NEXT: bx lr 784 %1 = fadd double %p, %p 785 %2 = bitcast double %1 to <4 x i16> 786 %3 = add <4 x i16> %2, %2 787 ret <4 x i16> %3 788} 789 790define <4 x i16> @test_v4i16_v1i64(<1 x i64> %p) { 791; SOFT-LABEL: test_v4i16_v1i64: 792; SOFT: @ %bb.0: 793; SOFT-NEXT: vmov d16, r1, r0 794; SOFT-NEXT: vadd.i64 d16, d16, d16 795; SOFT-NEXT: vrev64.16 d16, d16 796; SOFT-NEXT: vadd.i16 d16, d16, d16 797; SOFT-NEXT: vrev64.16 d16, d16 798; SOFT-NEXT: vmov r1, r0, d16 799; SOFT-NEXT: bx lr 800; 801; HARD-LABEL: test_v4i16_v1i64: 802; HARD: @ %bb.0: 803; HARD-NEXT: vadd.i64 d16, d0, d0 804; HARD-NEXT: vrev64.16 d16, d16 805; HARD-NEXT: vadd.i16 d16, d16, d16 806; HARD-NEXT: vrev64.16 d0, d16 807; HARD-NEXT: bx lr 808 %1 = add <1 x i64> %p, %p 809 %2 = bitcast <1 x i64> %1 to <4 x i16> 810 %3 = add <4 x i16> %2, %2 811 ret <4 x i16> %3 812} 813 814define <4 x i16> @test_v4i16_v2f32(<2 x float> %p) { 815; SOFT-LABEL: test_v4i16_v2f32: 816; SOFT: @ %bb.0: 817; SOFT-NEXT: vmov d16, r1, r0 818; SOFT-NEXT: vrev64.32 d16, d16 819; SOFT-NEXT: vadd.f32 d16, d16, d16 820; SOFT-NEXT: vrev32.16 d16, d16 821; SOFT-NEXT: vadd.i16 d16, d16, d16 822; SOFT-NEXT: vrev64.16 d16, d16 823; SOFT-NEXT: vmov r1, r0, d16 824; SOFT-NEXT: bx lr 825; 826; HARD-LABEL: test_v4i16_v2f32: 827; HARD: @ %bb.0: 828; HARD-NEXT: vrev64.32 d16, d0 829; HARD-NEXT: vadd.f32 d16, d16, d16 830; HARD-NEXT: vrev32.16 d16, d16 831; HARD-NEXT: vadd.i16 d16, d16, d16 832; HARD-NEXT: vrev64.16 d0, d16 833; HARD-NEXT: bx lr 834 %1 = fadd <2 x float> %p, %p 835 %2 = bitcast <2 x float> %1 to <4 x i16> 836 %3 = add <4 x i16> %2, %2 837 ret <4 x i16> %3 838} 839 840define <4 x i16> @test_v4i16_v2i32(<2 x i32> %p) { 841; SOFT-LABEL: test_v4i16_v2i32: 842; SOFT: @ %bb.0: 843; SOFT-NEXT: vmov d16, r1, r0 844; SOFT-NEXT: vrev64.32 d16, d16 845; SOFT-NEXT: vadd.i32 d16, d16, d16 846; SOFT-NEXT: vrev32.16 d16, d16 847; SOFT-NEXT: vadd.i16 d16, d16, d16 848; SOFT-NEXT: vrev64.16 d16, d16 849; SOFT-NEXT: vmov r1, r0, d16 850; SOFT-NEXT: bx lr 851; 852; HARD-LABEL: test_v4i16_v2i32: 853; HARD: @ %bb.0: 854; HARD-NEXT: vrev64.32 d16, d0 855; HARD-NEXT: vadd.i32 d16, d16, d16 856; HARD-NEXT: vrev32.16 d16, d16 857; HARD-NEXT: vadd.i16 d16, d16, d16 858; HARD-NEXT: vrev64.16 d0, d16 859; HARD-NEXT: bx lr 860 %1 = add <2 x i32> %p, %p 861 %2 = bitcast <2 x i32> %1 to <4 x i16> 862 %3 = add <4 x i16> %2, %2 863 ret <4 x i16> %3 864} 865 866define <4 x i16> @test_v4i16_v8i8(<8 x i8> %p) { 867; SOFT-LABEL: test_v4i16_v8i8: 868; SOFT: @ %bb.0: 869; SOFT-NEXT: vmov d16, r1, r0 870; SOFT-NEXT: vrev64.8 d16, d16 871; SOFT-NEXT: vadd.i8 d16, d16, d16 872; SOFT-NEXT: vrev16.8 d16, d16 873; SOFT-NEXT: vadd.i16 d16, d16, d16 874; SOFT-NEXT: vrev64.16 d16, d16 875; SOFT-NEXT: vmov r1, r0, d16 876; SOFT-NEXT: bx lr 877; 878; HARD-LABEL: test_v4i16_v8i8: 879; HARD: @ %bb.0: 880; HARD-NEXT: vrev64.8 d16, d0 881; HARD-NEXT: vadd.i8 d16, d16, d16 882; HARD-NEXT: vrev16.8 d16, d16 883; HARD-NEXT: vadd.i16 d16, d16, d16 884; HARD-NEXT: vrev64.16 d0, d16 885; HARD-NEXT: bx lr 886 %1 = add <8 x i8> %p, %p 887 %2 = bitcast <8 x i8> %1 to <4 x i16> 888 %3 = add <4 x i16> %2, %2 889 ret <4 x i16> %3 890} 891 892define <8 x i8> @test_v8i8_i64(i64 %p) { 893; SOFT-LABEL: test_v8i8_i64: 894; SOFT: @ %bb.0: 895; SOFT-NEXT: adds r1, r1, r1 896; SOFT-NEXT: adc r0, r0, r0 897; SOFT-NEXT: vmov d16, r1, r0 898; SOFT-NEXT: vrev64.8 d16, d16 899; SOFT-NEXT: vadd.i8 d16, d16, d16 900; SOFT-NEXT: vrev64.8 d16, d16 901; SOFT-NEXT: vmov r1, r0, d16 902; SOFT-NEXT: bx lr 903; 904; HARD-LABEL: test_v8i8_i64: 905; HARD: @ %bb.0: 906; HARD-NEXT: adds r1, r1, r1 907; HARD-NEXT: adc r0, r0, r0 908; HARD-NEXT: vmov d16, r1, r0 909; HARD-NEXT: vrev64.8 d16, d16 910; HARD-NEXT: vadd.i8 d16, d16, d16 911; HARD-NEXT: vrev64.8 d0, d16 912; HARD-NEXT: bx lr 913 %1 = add i64 %p, %p 914 %2 = bitcast i64 %1 to <8 x i8> 915 %3 = add <8 x i8> %2, %2 916 ret <8 x i8> %3 917} 918 919define <8 x i8> @test_v8i8_f64(double %p) { 920; SOFT-LABEL: test_v8i8_f64: 921; SOFT: @ %bb.0: 922; SOFT-NEXT: vmov d16, r1, r0 923; SOFT-NEXT: vadd.f64 d16, d16, d16 924; SOFT-NEXT: vrev64.8 d16, d16 925; SOFT-NEXT: vadd.i8 d16, d16, d16 926; SOFT-NEXT: vrev64.8 d16, d16 927; SOFT-NEXT: vmov r1, r0, d16 928; SOFT-NEXT: bx lr 929; 930; HARD-LABEL: test_v8i8_f64: 931; HARD: @ %bb.0: 932; HARD-NEXT: vadd.f64 d16, d0, d0 933; HARD-NEXT: vrev64.8 d16, d16 934; HARD-NEXT: vadd.i8 d16, d16, d16 935; HARD-NEXT: vrev64.8 d0, d16 936; HARD-NEXT: bx lr 937 %1 = fadd double %p, %p 938 %2 = bitcast double %1 to <8 x i8> 939 %3 = add <8 x i8> %2, %2 940 ret <8 x i8> %3 941} 942 943define <8 x i8> @test_v8i8_v1i64(<1 x i64> %p) { 944; SOFT-LABEL: test_v8i8_v1i64: 945; SOFT: @ %bb.0: 946; SOFT-NEXT: vmov d16, r1, r0 947; SOFT-NEXT: vadd.i64 d16, d16, d16 948; SOFT-NEXT: vrev64.8 d16, d16 949; SOFT-NEXT: vadd.i8 d16, d16, d16 950; SOFT-NEXT: vrev64.8 d16, d16 951; SOFT-NEXT: vmov r1, r0, d16 952; SOFT-NEXT: bx lr 953; 954; HARD-LABEL: test_v8i8_v1i64: 955; HARD: @ %bb.0: 956; HARD-NEXT: vadd.i64 d16, d0, d0 957; HARD-NEXT: vrev64.8 d16, d16 958; HARD-NEXT: vadd.i8 d16, d16, d16 959; HARD-NEXT: vrev64.8 d0, d16 960; HARD-NEXT: bx lr 961 %1 = add <1 x i64> %p, %p 962 %2 = bitcast <1 x i64> %1 to <8 x i8> 963 %3 = add <8 x i8> %2, %2 964 ret <8 x i8> %3 965} 966 967define <8 x i8> @test_v8i8_v2f32(<2 x float> %p) { 968; SOFT-LABEL: test_v8i8_v2f32: 969; SOFT: @ %bb.0: 970; SOFT-NEXT: vmov d16, r1, r0 971; SOFT-NEXT: vrev64.32 d16, d16 972; SOFT-NEXT: vadd.f32 d16, d16, d16 973; SOFT-NEXT: vrev32.8 d16, d16 974; SOFT-NEXT: vadd.i8 d16, d16, d16 975; SOFT-NEXT: vrev64.8 d16, d16 976; SOFT-NEXT: vmov r1, r0, d16 977; SOFT-NEXT: bx lr 978; 979; HARD-LABEL: test_v8i8_v2f32: 980; HARD: @ %bb.0: 981; HARD-NEXT: vrev64.32 d16, d0 982; HARD-NEXT: vadd.f32 d16, d16, d16 983; HARD-NEXT: vrev32.8 d16, d16 984; HARD-NEXT: vadd.i8 d16, d16, d16 985; HARD-NEXT: vrev64.8 d0, d16 986; HARD-NEXT: bx lr 987 %1 = fadd <2 x float> %p, %p 988 %2 = bitcast <2 x float> %1 to <8 x i8> 989 %3 = add <8 x i8> %2, %2 990 ret <8 x i8> %3 991} 992 993define <8 x i8> @test_v8i8_v2i32(<2 x i32> %p) { 994; SOFT-LABEL: test_v8i8_v2i32: 995; SOFT: @ %bb.0: 996; SOFT-NEXT: vmov d16, r1, r0 997; SOFT-NEXT: vrev64.32 d16, d16 998; SOFT-NEXT: vadd.i32 d16, d16, d16 999; SOFT-NEXT: vrev32.8 d16, d16 1000; SOFT-NEXT: vadd.i8 d16, d16, d16 1001; SOFT-NEXT: vrev64.8 d16, d16 1002; SOFT-NEXT: vmov r1, r0, d16 1003; SOFT-NEXT: bx lr 1004; 1005; HARD-LABEL: test_v8i8_v2i32: 1006; HARD: @ %bb.0: 1007; HARD-NEXT: vrev64.32 d16, d0 1008; HARD-NEXT: vadd.i32 d16, d16, d16 1009; HARD-NEXT: vrev32.8 d16, d16 1010; HARD-NEXT: vadd.i8 d16, d16, d16 1011; HARD-NEXT: vrev64.8 d0, d16 1012; HARD-NEXT: bx lr 1013 %1 = add <2 x i32> %p, %p 1014 %2 = bitcast <2 x i32> %1 to <8 x i8> 1015 %3 = add <8 x i8> %2, %2 1016 ret <8 x i8> %3 1017} 1018 1019define <8 x i8> @test_v8i8_v4i16(<4 x i16> %p) { 1020; SOFT-LABEL: test_v8i8_v4i16: 1021; SOFT: @ %bb.0: 1022; SOFT-NEXT: vmov d16, r1, r0 1023; SOFT-NEXT: vrev64.16 d16, d16 1024; SOFT-NEXT: vadd.i16 d16, d16, d16 1025; SOFT-NEXT: vrev16.8 d16, d16 1026; SOFT-NEXT: vadd.i8 d16, d16, d16 1027; SOFT-NEXT: vrev64.8 d16, d16 1028; SOFT-NEXT: vmov r1, r0, d16 1029; SOFT-NEXT: bx lr 1030; 1031; HARD-LABEL: test_v8i8_v4i16: 1032; HARD: @ %bb.0: 1033; HARD-NEXT: vrev64.16 d16, d0 1034; HARD-NEXT: vadd.i16 d16, d16, d16 1035; HARD-NEXT: vrev16.8 d16, d16 1036; HARD-NEXT: vadd.i8 d16, d16, d16 1037; HARD-NEXT: vrev64.8 d0, d16 1038; HARD-NEXT: bx lr 1039 %1 = add <4 x i16> %p, %p 1040 %2 = bitcast <4 x i16> %1 to <8 x i8> 1041 %3 = add <8 x i8> %2, %2 1042 ret <8 x i8> %3 1043} 1044 1045define fp128 @test_f128_v2f64(<2 x double> %p) { 1046; SOFT-LABEL: test_f128_v2f64: 1047; SOFT: @ %bb.0: 1048; SOFT-NEXT: .save {r11, lr} 1049; SOFT-NEXT: push {r11, lr} 1050; SOFT-NEXT: .pad #16 1051; SOFT-NEXT: sub sp, sp, #16 1052; SOFT-NEXT: vmov d16, r3, r2 1053; SOFT-NEXT: vmov d17, r1, r0 1054; SOFT-NEXT: vadd.f64 d19, d16, d16 1055; SOFT-NEXT: vadd.f64 d18, d17, d17 1056; SOFT-NEXT: vrev64.32 q8, q9 1057; SOFT-NEXT: vmov r2, r3, d17 1058; SOFT-NEXT: vmov r0, r1, d16 1059; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1060; SOFT-NEXT: bl __addtf3 1061; SOFT-NEXT: add sp, sp, #16 1062; SOFT-NEXT: pop {r11, pc} 1063; 1064; HARD-LABEL: test_f128_v2f64: 1065; HARD: @ %bb.0: 1066; HARD-NEXT: .save {r11, lr} 1067; HARD-NEXT: push {r11, lr} 1068; HARD-NEXT: .pad #16 1069; HARD-NEXT: sub sp, sp, #16 1070; HARD-NEXT: vadd.f64 d17, d1, d1 1071; HARD-NEXT: vadd.f64 d16, d0, d0 1072; HARD-NEXT: vrev64.32 q8, q8 1073; HARD-NEXT: vmov r2, r3, d17 1074; HARD-NEXT: vmov r0, r1, d16 1075; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1076; HARD-NEXT: bl __addtf3 1077; HARD-NEXT: add sp, sp, #16 1078; HARD-NEXT: pop {r11, pc} 1079 %1 = fadd <2 x double> %p, %p 1080 %2 = bitcast <2 x double> %1 to fp128 1081 %3 = fadd fp128 %2, %2 1082 ret fp128 %3 1083} 1084 1085define fp128 @test_f128_v2i64(<2 x i64> %p) { 1086; SOFT-LABEL: test_f128_v2i64: 1087; SOFT: @ %bb.0: 1088; SOFT-NEXT: .save {r11, lr} 1089; SOFT-NEXT: push {r11, lr} 1090; SOFT-NEXT: .pad #16 1091; SOFT-NEXT: sub sp, sp, #16 1092; SOFT-NEXT: vmov d17, r3, r2 1093; SOFT-NEXT: vmov d16, r1, r0 1094; SOFT-NEXT: vadd.i64 q8, q8, q8 1095; SOFT-NEXT: vrev64.32 q8, q8 1096; SOFT-NEXT: vmov r2, r3, d17 1097; SOFT-NEXT: vmov r0, r1, d16 1098; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1099; SOFT-NEXT: bl __addtf3 1100; SOFT-NEXT: add sp, sp, #16 1101; SOFT-NEXT: pop {r11, pc} 1102; 1103; HARD-LABEL: test_f128_v2i64: 1104; HARD: @ %bb.0: 1105; HARD-NEXT: .save {r11, lr} 1106; HARD-NEXT: push {r11, lr} 1107; HARD-NEXT: .pad #16 1108; HARD-NEXT: sub sp, sp, #16 1109; HARD-NEXT: vadd.i64 q8, q0, q0 1110; HARD-NEXT: vrev64.32 q8, q8 1111; HARD-NEXT: vmov r2, r3, d17 1112; HARD-NEXT: vmov r0, r1, d16 1113; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1114; HARD-NEXT: bl __addtf3 1115; HARD-NEXT: add sp, sp, #16 1116; HARD-NEXT: pop {r11, pc} 1117 %1 = add <2 x i64> %p, %p 1118 %2 = bitcast <2 x i64> %1 to fp128 1119 %3 = fadd fp128 %2, %2 1120 ret fp128 %3 1121} 1122 1123define fp128 @test_f128_v4f32(<4 x float> %p) { 1124; SOFT-LABEL: test_f128_v4f32: 1125; SOFT: @ %bb.0: 1126; SOFT-NEXT: .save {r11, lr} 1127; SOFT-NEXT: push {r11, lr} 1128; SOFT-NEXT: .pad #16 1129; SOFT-NEXT: sub sp, sp, #16 1130; SOFT-NEXT: vmov d17, r3, r2 1131; SOFT-NEXT: vmov d16, r1, r0 1132; SOFT-NEXT: vrev64.32 q8, q8 1133; SOFT-NEXT: vadd.f32 q8, q8, q8 1134; SOFT-NEXT: vmov r2, r3, d17 1135; SOFT-NEXT: vmov r0, r1, d16 1136; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1137; SOFT-NEXT: bl __addtf3 1138; SOFT-NEXT: add sp, sp, #16 1139; SOFT-NEXT: pop {r11, pc} 1140; 1141; HARD-LABEL: test_f128_v4f32: 1142; HARD: @ %bb.0: 1143; HARD-NEXT: .save {r11, lr} 1144; HARD-NEXT: push {r11, lr} 1145; HARD-NEXT: .pad #16 1146; HARD-NEXT: sub sp, sp, #16 1147; HARD-NEXT: vrev64.32 q8, q0 1148; HARD-NEXT: vadd.f32 q8, q8, q8 1149; HARD-NEXT: vmov r2, r3, d17 1150; HARD-NEXT: vmov r0, r1, d16 1151; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1152; HARD-NEXT: bl __addtf3 1153; HARD-NEXT: add sp, sp, #16 1154; HARD-NEXT: pop {r11, pc} 1155 %1 = fadd <4 x float> %p, %p 1156 %2 = bitcast <4 x float> %1 to fp128 1157 %3 = fadd fp128 %2, %2 1158 ret fp128 %3 1159} 1160 1161define fp128 @test_f128_v4i32(<4 x i32> %p) { 1162; SOFT-LABEL: test_f128_v4i32: 1163; SOFT: @ %bb.0: 1164; SOFT-NEXT: .save {r11, lr} 1165; SOFT-NEXT: push {r11, lr} 1166; SOFT-NEXT: .pad #16 1167; SOFT-NEXT: sub sp, sp, #16 1168; SOFT-NEXT: vmov d17, r3, r2 1169; SOFT-NEXT: vmov d16, r1, r0 1170; SOFT-NEXT: vrev64.32 q8, q8 1171; SOFT-NEXT: vadd.i32 q8, q8, q8 1172; SOFT-NEXT: vmov r2, r3, d17 1173; SOFT-NEXT: vmov r0, r1, d16 1174; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1175; SOFT-NEXT: bl __addtf3 1176; SOFT-NEXT: add sp, sp, #16 1177; SOFT-NEXT: pop {r11, pc} 1178; 1179; HARD-LABEL: test_f128_v4i32: 1180; HARD: @ %bb.0: 1181; HARD-NEXT: .save {r11, lr} 1182; HARD-NEXT: push {r11, lr} 1183; HARD-NEXT: .pad #16 1184; HARD-NEXT: sub sp, sp, #16 1185; HARD-NEXT: vrev64.32 q8, q0 1186; HARD-NEXT: vadd.i32 q8, q8, q8 1187; HARD-NEXT: vmov r2, r3, d17 1188; HARD-NEXT: vmov r0, r1, d16 1189; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1190; HARD-NEXT: bl __addtf3 1191; HARD-NEXT: add sp, sp, #16 1192; HARD-NEXT: pop {r11, pc} 1193 %1 = add <4 x i32> %p, %p 1194 %2 = bitcast <4 x i32> %1 to fp128 1195 %3 = fadd fp128 %2, %2 1196 ret fp128 %3 1197} 1198 1199define fp128 @test_f128_v8i16(<8 x i16> %p) { 1200; SOFT-LABEL: test_f128_v8i16: 1201; SOFT: @ %bb.0: 1202; SOFT-NEXT: .save {r11, lr} 1203; SOFT-NEXT: push {r11, lr} 1204; SOFT-NEXT: .pad #16 1205; SOFT-NEXT: sub sp, sp, #16 1206; SOFT-NEXT: vmov d17, r3, r2 1207; SOFT-NEXT: vmov d16, r1, r0 1208; SOFT-NEXT: vrev64.16 q8, q8 1209; SOFT-NEXT: vadd.i16 q8, q8, q8 1210; SOFT-NEXT: vrev32.16 q8, q8 1211; SOFT-NEXT: vmov r2, r3, d17 1212; SOFT-NEXT: vmov r0, r1, d16 1213; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1214; SOFT-NEXT: bl __addtf3 1215; SOFT-NEXT: add sp, sp, #16 1216; SOFT-NEXT: pop {r11, pc} 1217; 1218; HARD-LABEL: test_f128_v8i16: 1219; HARD: @ %bb.0: 1220; HARD-NEXT: .save {r11, lr} 1221; HARD-NEXT: push {r11, lr} 1222; HARD-NEXT: .pad #16 1223; HARD-NEXT: sub sp, sp, #16 1224; HARD-NEXT: vrev64.16 q8, q0 1225; HARD-NEXT: vadd.i16 q8, q8, q8 1226; HARD-NEXT: vrev32.16 q8, q8 1227; HARD-NEXT: vmov r2, r3, d17 1228; HARD-NEXT: vmov r0, r1, d16 1229; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1230; HARD-NEXT: bl __addtf3 1231; HARD-NEXT: add sp, sp, #16 1232; HARD-NEXT: pop {r11, pc} 1233 %1 = add <8 x i16> %p, %p 1234 %2 = bitcast <8 x i16> %1 to fp128 1235 %3 = fadd fp128 %2, %2 1236 ret fp128 %3 1237} 1238 1239define fp128 @test_f128_v16i8(<16 x i8> %p) { 1240; SOFT-LABEL: test_f128_v16i8: 1241; SOFT: @ %bb.0: 1242; SOFT-NEXT: .save {r11, lr} 1243; SOFT-NEXT: push {r11, lr} 1244; SOFT-NEXT: .pad #16 1245; SOFT-NEXT: sub sp, sp, #16 1246; SOFT-NEXT: vmov d17, r3, r2 1247; SOFT-NEXT: vmov d16, r1, r0 1248; SOFT-NEXT: vrev64.8 q8, q8 1249; SOFT-NEXT: vadd.i8 q8, q8, q8 1250; SOFT-NEXT: vrev32.8 q8, q8 1251; SOFT-NEXT: vmov r2, r3, d17 1252; SOFT-NEXT: vmov r0, r1, d16 1253; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1254; SOFT-NEXT: bl __addtf3 1255; SOFT-NEXT: add sp, sp, #16 1256; SOFT-NEXT: pop {r11, pc} 1257; 1258; HARD-LABEL: test_f128_v16i8: 1259; HARD: @ %bb.0: 1260; HARD-NEXT: .save {r11, lr} 1261; HARD-NEXT: push {r11, lr} 1262; HARD-NEXT: .pad #16 1263; HARD-NEXT: sub sp, sp, #16 1264; HARD-NEXT: vrev64.8 q8, q0 1265; HARD-NEXT: vadd.i8 q8, q8, q8 1266; HARD-NEXT: vrev32.8 q8, q8 1267; HARD-NEXT: vmov r2, r3, d17 1268; HARD-NEXT: vmov r0, r1, d16 1269; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1270; HARD-NEXT: bl __addtf3 1271; HARD-NEXT: add sp, sp, #16 1272; HARD-NEXT: pop {r11, pc} 1273 %1 = add <16 x i8> %p, %p 1274 %2 = bitcast <16 x i8> %1 to fp128 1275 %3 = fadd fp128 %2, %2 1276 ret fp128 %3 1277} 1278 1279define <2 x double> @test_v2f64_f128(fp128 %p) { 1280; SOFT-LABEL: test_v2f64_f128: 1281; SOFT: @ %bb.0: 1282; SOFT-NEXT: .save {r11, lr} 1283; SOFT-NEXT: push {r11, lr} 1284; SOFT-NEXT: .pad #16 1285; SOFT-NEXT: sub sp, sp, #16 1286; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1287; SOFT-NEXT: bl __addtf3 1288; SOFT-NEXT: vmov.32 d17[0], r2 1289; SOFT-NEXT: vmov.32 d16[0], r0 1290; SOFT-NEXT: vmov.32 d17[1], r3 1291; SOFT-NEXT: vmov.32 d16[1], r1 1292; SOFT-NEXT: vrev64.32 q8, q8 1293; SOFT-NEXT: vadd.f64 d18, d16, d16 1294; SOFT-NEXT: vadd.f64 d16, d17, d17 1295; SOFT-NEXT: vmov r1, r0, d18 1296; SOFT-NEXT: vmov r3, r2, d16 1297; SOFT-NEXT: add sp, sp, #16 1298; SOFT-NEXT: pop {r11, pc} 1299; 1300; HARD-LABEL: test_v2f64_f128: 1301; HARD: @ %bb.0: 1302; HARD-NEXT: .save {r11, lr} 1303; HARD-NEXT: push {r11, lr} 1304; HARD-NEXT: .pad #16 1305; HARD-NEXT: sub sp, sp, #16 1306; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1307; HARD-NEXT: bl __addtf3 1308; HARD-NEXT: vmov.32 d17[0], r2 1309; HARD-NEXT: vmov.32 d16[0], r0 1310; HARD-NEXT: vmov.32 d17[1], r3 1311; HARD-NEXT: vmov.32 d16[1], r1 1312; HARD-NEXT: vrev64.32 q8, q8 1313; HARD-NEXT: vadd.f64 d1, d17, d17 1314; HARD-NEXT: vadd.f64 d0, d16, d16 1315; HARD-NEXT: add sp, sp, #16 1316; HARD-NEXT: pop {r11, pc} 1317 %1 = fadd fp128 %p, %p 1318 %2 = bitcast fp128 %1 to <2 x double> 1319 %3 = fadd <2 x double> %2, %2 1320 ret <2 x double> %3 1321} 1322 1323define <2 x double> @test_v2f64_v2i64(<2 x i64> %p) { 1324; SOFT-LABEL: test_v2f64_v2i64: 1325; SOFT: @ %bb.0: 1326; SOFT-NEXT: vmov d17, r3, r2 1327; SOFT-NEXT: vmov d16, r1, r0 1328; SOFT-NEXT: vadd.i64 q8, q8, q8 1329; SOFT-NEXT: vadd.f64 d18, d16, d16 1330; SOFT-NEXT: vadd.f64 d16, d17, d17 1331; SOFT-NEXT: vmov r1, r0, d18 1332; SOFT-NEXT: vmov r3, r2, d16 1333; SOFT-NEXT: bx lr 1334; 1335; HARD-LABEL: test_v2f64_v2i64: 1336; HARD: @ %bb.0: 1337; HARD-NEXT: vadd.i64 q8, q0, q0 1338; HARD-NEXT: vadd.f64 d1, d17, d17 1339; HARD-NEXT: vadd.f64 d0, d16, d16 1340; HARD-NEXT: bx lr 1341 %1 = add <2 x i64> %p, %p 1342 %2 = bitcast <2 x i64> %1 to <2 x double> 1343 %3 = fadd <2 x double> %2, %2 1344 ret <2 x double> %3 1345} 1346 1347define <2 x double> @test_v2f64_v4f32(<4 x float> %p) { 1348; SOFT-LABEL: test_v2f64_v4f32: 1349; SOFT: @ %bb.0: 1350; SOFT-NEXT: vmov d17, r3, r2 1351; SOFT-NEXT: vmov d16, r1, r0 1352; SOFT-NEXT: vrev64.32 q8, q8 1353; SOFT-NEXT: vadd.f32 q8, q8, q8 1354; SOFT-NEXT: vrev64.32 q8, q8 1355; SOFT-NEXT: vadd.f64 d18, d16, d16 1356; SOFT-NEXT: vadd.f64 d16, d17, d17 1357; SOFT-NEXT: vmov r1, r0, d18 1358; SOFT-NEXT: vmov r3, r2, d16 1359; SOFT-NEXT: bx lr 1360; 1361; HARD-LABEL: test_v2f64_v4f32: 1362; HARD: @ %bb.0: 1363; HARD-NEXT: vrev64.32 q8, q0 1364; HARD-NEXT: vadd.f32 q8, q8, q8 1365; HARD-NEXT: vrev64.32 q8, q8 1366; HARD-NEXT: vadd.f64 d1, d17, d17 1367; HARD-NEXT: vadd.f64 d0, d16, d16 1368; HARD-NEXT: bx lr 1369 %1 = fadd <4 x float> %p, %p 1370 %2 = bitcast <4 x float> %1 to <2 x double> 1371 %3 = fadd <2 x double> %2, %2 1372 ret <2 x double> %3 1373} 1374 1375define <2 x double> @test_v2f64_v4i32(<4 x i32> %p) { 1376; SOFT-LABEL: test_v2f64_v4i32: 1377; SOFT: @ %bb.0: 1378; SOFT-NEXT: vmov d17, r3, r2 1379; SOFT-NEXT: vmov d16, r1, r0 1380; SOFT-NEXT: vrev64.32 q8, q8 1381; SOFT-NEXT: vadd.i32 q8, q8, q8 1382; SOFT-NEXT: vrev64.32 q8, q8 1383; SOFT-NEXT: vadd.f64 d18, d16, d16 1384; SOFT-NEXT: vadd.f64 d16, d17, d17 1385; SOFT-NEXT: vmov r1, r0, d18 1386; SOFT-NEXT: vmov r3, r2, d16 1387; SOFT-NEXT: bx lr 1388; 1389; HARD-LABEL: test_v2f64_v4i32: 1390; HARD: @ %bb.0: 1391; HARD-NEXT: vrev64.32 q8, q0 1392; HARD-NEXT: vadd.i32 q8, q8, q8 1393; HARD-NEXT: vrev64.32 q8, q8 1394; HARD-NEXT: vadd.f64 d1, d17, d17 1395; HARD-NEXT: vadd.f64 d0, d16, d16 1396; HARD-NEXT: bx lr 1397 %1 = add <4 x i32> %p, %p 1398 %2 = bitcast <4 x i32> %1 to <2 x double> 1399 %3 = fadd <2 x double> %2, %2 1400 ret <2 x double> %3 1401} 1402 1403define <2 x double> @test_v2f64_v8i16(<8 x i16> %p) { 1404; SOFT-LABEL: test_v2f64_v8i16: 1405; SOFT: @ %bb.0: 1406; SOFT-NEXT: vmov d17, r3, r2 1407; SOFT-NEXT: vmov d16, r1, r0 1408; SOFT-NEXT: vrev64.16 q8, q8 1409; SOFT-NEXT: vadd.i16 q8, q8, q8 1410; SOFT-NEXT: vrev64.16 q8, q8 1411; SOFT-NEXT: vadd.f64 d18, d16, d16 1412; SOFT-NEXT: vadd.f64 d16, d17, d17 1413; SOFT-NEXT: vmov r1, r0, d18 1414; SOFT-NEXT: vmov r3, r2, d16 1415; SOFT-NEXT: bx lr 1416; 1417; HARD-LABEL: test_v2f64_v8i16: 1418; HARD: @ %bb.0: 1419; HARD-NEXT: vrev64.16 q8, q0 1420; HARD-NEXT: vadd.i16 q8, q8, q8 1421; HARD-NEXT: vrev64.16 q8, q8 1422; HARD-NEXT: vadd.f64 d1, d17, d17 1423; HARD-NEXT: vadd.f64 d0, d16, d16 1424; HARD-NEXT: bx lr 1425 %1 = add <8 x i16> %p, %p 1426 %2 = bitcast <8 x i16> %1 to <2 x double> 1427 %3 = fadd <2 x double> %2, %2 1428 ret <2 x double> %3 1429} 1430 1431define <2 x double> @test_v2f64_v16i8(<16 x i8> %p) { 1432; SOFT-LABEL: test_v2f64_v16i8: 1433; SOFT: @ %bb.0: 1434; SOFT-NEXT: vmov d17, r3, r2 1435; SOFT-NEXT: vmov d16, r1, r0 1436; SOFT-NEXT: vrev64.8 q8, q8 1437; SOFT-NEXT: vadd.i8 q8, q8, q8 1438; SOFT-NEXT: vrev64.8 q8, q8 1439; SOFT-NEXT: vadd.f64 d18, d16, d16 1440; SOFT-NEXT: vadd.f64 d16, d17, d17 1441; SOFT-NEXT: vmov r1, r0, d18 1442; SOFT-NEXT: vmov r3, r2, d16 1443; SOFT-NEXT: bx lr 1444; 1445; HARD-LABEL: test_v2f64_v16i8: 1446; HARD: @ %bb.0: 1447; HARD-NEXT: vrev64.8 q8, q0 1448; HARD-NEXT: vadd.i8 q8, q8, q8 1449; HARD-NEXT: vrev64.8 q8, q8 1450; HARD-NEXT: vadd.f64 d1, d17, d17 1451; HARD-NEXT: vadd.f64 d0, d16, d16 1452; HARD-NEXT: bx lr 1453 %1 = add <16 x i8> %p, %p 1454 %2 = bitcast <16 x i8> %1 to <2 x double> 1455 %3 = fadd <2 x double> %2, %2 1456 ret <2 x double> %3 1457} 1458 1459define <2 x i64> @test_v2i64_f128(fp128 %p) { 1460; SOFT-LABEL: test_v2i64_f128: 1461; SOFT: @ %bb.0: 1462; SOFT-NEXT: .save {r11, lr} 1463; SOFT-NEXT: push {r11, lr} 1464; SOFT-NEXT: .pad #16 1465; SOFT-NEXT: sub sp, sp, #16 1466; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1467; SOFT-NEXT: bl __addtf3 1468; SOFT-NEXT: vmov.32 d17[0], r2 1469; SOFT-NEXT: vmov.32 d16[0], r0 1470; SOFT-NEXT: vmov.32 d17[1], r3 1471; SOFT-NEXT: vmov.32 d16[1], r1 1472; SOFT-NEXT: vrev64.32 q8, q8 1473; SOFT-NEXT: vadd.i64 q8, q8, q8 1474; SOFT-NEXT: vmov r1, r0, d16 1475; SOFT-NEXT: vmov r3, r2, d17 1476; SOFT-NEXT: add sp, sp, #16 1477; SOFT-NEXT: pop {r11, pc} 1478; 1479; HARD-LABEL: test_v2i64_f128: 1480; HARD: @ %bb.0: 1481; HARD-NEXT: .save {r11, lr} 1482; HARD-NEXT: push {r11, lr} 1483; HARD-NEXT: .pad #16 1484; HARD-NEXT: sub sp, sp, #16 1485; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1486; HARD-NEXT: bl __addtf3 1487; HARD-NEXT: vmov.32 d17[0], r2 1488; HARD-NEXT: vmov.32 d16[0], r0 1489; HARD-NEXT: vmov.32 d17[1], r3 1490; HARD-NEXT: vmov.32 d16[1], r1 1491; HARD-NEXT: vrev64.32 q8, q8 1492; HARD-NEXT: vadd.i64 q0, q8, q8 1493; HARD-NEXT: add sp, sp, #16 1494; HARD-NEXT: pop {r11, pc} 1495 %1 = fadd fp128 %p, %p 1496 %2 = bitcast fp128 %1 to <2 x i64> 1497 %3 = add <2 x i64> %2, %2 1498 ret <2 x i64> %3 1499} 1500 1501define <2 x i64> @test_v2i64_v2f64(<2 x double> %p) { 1502; SOFT-LABEL: test_v2i64_v2f64: 1503; SOFT: @ %bb.0: 1504; SOFT-NEXT: vmov d16, r3, r2 1505; SOFT-NEXT: vmov d17, r1, r0 1506; SOFT-NEXT: vadd.f64 d19, d16, d16 1507; SOFT-NEXT: vadd.f64 d18, d17, d17 1508; SOFT-NEXT: vadd.i64 q8, q9, q9 1509; SOFT-NEXT: vmov r1, r0, d16 1510; SOFT-NEXT: vmov r3, r2, d17 1511; SOFT-NEXT: bx lr 1512; 1513; HARD-LABEL: test_v2i64_v2f64: 1514; HARD: @ %bb.0: 1515; HARD-NEXT: vadd.f64 d17, d1, d1 1516; HARD-NEXT: vadd.f64 d16, d0, d0 1517; HARD-NEXT: vadd.i64 q0, q8, q8 1518; HARD-NEXT: bx lr 1519 %1 = fadd <2 x double> %p, %p 1520 %2 = bitcast <2 x double> %1 to <2 x i64> 1521 %3 = add <2 x i64> %2, %2 1522 ret <2 x i64> %3 1523} 1524 1525define <2 x i64> @test_v2i64_v4f32(<4 x float> %p) { 1526; SOFT-LABEL: test_v2i64_v4f32: 1527; SOFT: @ %bb.0: 1528; SOFT-NEXT: vmov d17, r3, r2 1529; SOFT-NEXT: vmov d16, r1, r0 1530; SOFT-NEXT: vrev64.32 q8, q8 1531; SOFT-NEXT: vadd.f32 q8, q8, q8 1532; SOFT-NEXT: vrev64.32 q8, q8 1533; SOFT-NEXT: vadd.i64 q8, q8, q8 1534; SOFT-NEXT: vmov r1, r0, d16 1535; SOFT-NEXT: vmov r3, r2, d17 1536; SOFT-NEXT: bx lr 1537; 1538; HARD-LABEL: test_v2i64_v4f32: 1539; HARD: @ %bb.0: 1540; HARD-NEXT: vrev64.32 q8, q0 1541; HARD-NEXT: vadd.f32 q8, q8, q8 1542; HARD-NEXT: vrev64.32 q8, q8 1543; HARD-NEXT: vadd.i64 q0, q8, q8 1544; HARD-NEXT: bx lr 1545 %1 = fadd <4 x float> %p, %p 1546 %2 = bitcast <4 x float> %1 to <2 x i64> 1547 %3 = add <2 x i64> %2, %2 1548 ret <2 x i64> %3 1549} 1550 1551define <2 x i64> @test_v2i64_v4i32(<4 x i32> %p) { 1552; SOFT-LABEL: test_v2i64_v4i32: 1553; SOFT: @ %bb.0: 1554; SOFT-NEXT: vmov d17, r3, r2 1555; SOFT-NEXT: vmov d16, r1, r0 1556; SOFT-NEXT: vrev64.32 q8, q8 1557; SOFT-NEXT: vadd.i32 q8, q8, q8 1558; SOFT-NEXT: vrev64.32 q8, q8 1559; SOFT-NEXT: vadd.i64 q8, q8, q8 1560; SOFT-NEXT: vmov r1, r0, d16 1561; SOFT-NEXT: vmov r3, r2, d17 1562; SOFT-NEXT: bx lr 1563; 1564; HARD-LABEL: test_v2i64_v4i32: 1565; HARD: @ %bb.0: 1566; HARD-NEXT: vrev64.32 q8, q0 1567; HARD-NEXT: vadd.i32 q8, q8, q8 1568; HARD-NEXT: vrev64.32 q8, q8 1569; HARD-NEXT: vadd.i64 q0, q8, q8 1570; HARD-NEXT: bx lr 1571 %1 = add <4 x i32> %p, %p 1572 %2 = bitcast <4 x i32> %1 to <2 x i64> 1573 %3 = add <2 x i64> %2, %2 1574 ret <2 x i64> %3 1575} 1576 1577define <2 x i64> @test_v2i64_v8i16(<8 x i16> %p) { 1578; SOFT-LABEL: test_v2i64_v8i16: 1579; SOFT: @ %bb.0: 1580; SOFT-NEXT: vmov d17, r3, r2 1581; SOFT-NEXT: vmov d16, r1, r0 1582; SOFT-NEXT: vrev64.16 q8, q8 1583; SOFT-NEXT: vadd.i16 q8, q8, q8 1584; SOFT-NEXT: vrev64.16 q8, q8 1585; SOFT-NEXT: vadd.i64 q8, q8, q8 1586; SOFT-NEXT: vmov r1, r0, d16 1587; SOFT-NEXT: vmov r3, r2, d17 1588; SOFT-NEXT: bx lr 1589; 1590; HARD-LABEL: test_v2i64_v8i16: 1591; HARD: @ %bb.0: 1592; HARD-NEXT: vrev64.16 q8, q0 1593; HARD-NEXT: vadd.i16 q8, q8, q8 1594; HARD-NEXT: vrev64.16 q8, q8 1595; HARD-NEXT: vadd.i64 q0, q8, q8 1596; HARD-NEXT: bx lr 1597 %1 = add <8 x i16> %p, %p 1598 %2 = bitcast <8 x i16> %1 to <2 x i64> 1599 %3 = add <2 x i64> %2, %2 1600 ret <2 x i64> %3 1601} 1602 1603define <2 x i64> @test_v2i64_v16i8(<16 x i8> %p) { 1604; SOFT-LABEL: test_v2i64_v16i8: 1605; SOFT: @ %bb.0: 1606; SOFT-NEXT: vmov d17, r3, r2 1607; SOFT-NEXT: vmov d16, r1, r0 1608; SOFT-NEXT: vrev64.8 q8, q8 1609; SOFT-NEXT: vadd.i8 q8, q8, q8 1610; SOFT-NEXT: vrev64.8 q8, q8 1611; SOFT-NEXT: vadd.i64 q8, q8, q8 1612; SOFT-NEXT: vmov r1, r0, d16 1613; SOFT-NEXT: vmov r3, r2, d17 1614; SOFT-NEXT: bx lr 1615; 1616; HARD-LABEL: test_v2i64_v16i8: 1617; HARD: @ %bb.0: 1618; HARD-NEXT: vrev64.8 q8, q0 1619; HARD-NEXT: vadd.i8 q8, q8, q8 1620; HARD-NEXT: vrev64.8 q8, q8 1621; HARD-NEXT: vadd.i64 q0, q8, q8 1622; HARD-NEXT: bx lr 1623 %1 = add <16 x i8> %p, %p 1624 %2 = bitcast <16 x i8> %1 to <2 x i64> 1625 %3 = add <2 x i64> %2, %2 1626 ret <2 x i64> %3 1627} 1628 1629define <4 x float> @test_v4f32_f128(fp128 %p) { 1630; SOFT-LABEL: test_v4f32_f128: 1631; SOFT: @ %bb.0: 1632; SOFT-NEXT: .save {r11, lr} 1633; SOFT-NEXT: push {r11, lr} 1634; SOFT-NEXT: .pad #16 1635; SOFT-NEXT: sub sp, sp, #16 1636; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1637; SOFT-NEXT: bl __addtf3 1638; SOFT-NEXT: vmov.32 d17[0], r2 1639; SOFT-NEXT: vmov.32 d16[0], r0 1640; SOFT-NEXT: vmov.32 d17[1], r3 1641; SOFT-NEXT: vmov.32 d16[1], r1 1642; SOFT-NEXT: vadd.f32 q8, q8, q8 1643; SOFT-NEXT: vrev64.32 q8, q8 1644; SOFT-NEXT: vmov r1, r0, d16 1645; SOFT-NEXT: vmov r3, r2, d17 1646; SOFT-NEXT: add sp, sp, #16 1647; SOFT-NEXT: pop {r11, pc} 1648; 1649; HARD-LABEL: test_v4f32_f128: 1650; HARD: @ %bb.0: 1651; HARD-NEXT: .save {r11, lr} 1652; HARD-NEXT: push {r11, lr} 1653; HARD-NEXT: .pad #16 1654; HARD-NEXT: sub sp, sp, #16 1655; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1656; HARD-NEXT: bl __addtf3 1657; HARD-NEXT: vmov.32 d17[0], r2 1658; HARD-NEXT: vmov.32 d16[0], r0 1659; HARD-NEXT: vmov.32 d17[1], r3 1660; HARD-NEXT: vmov.32 d16[1], r1 1661; HARD-NEXT: vadd.f32 q8, q8, q8 1662; HARD-NEXT: vrev64.32 q0, q8 1663; HARD-NEXT: add sp, sp, #16 1664; HARD-NEXT: pop {r11, pc} 1665 %1 = fadd fp128 %p, %p 1666 %2 = bitcast fp128 %1 to <4 x float> 1667 %3 = fadd <4 x float> %2, %2 1668 ret <4 x float> %3 1669} 1670 1671define <4 x float> @test_v4f32_v2f64(<2 x double> %p) { 1672; SOFT-LABEL: test_v4f32_v2f64: 1673; SOFT: @ %bb.0: 1674; SOFT-NEXT: vmov d16, r3, r2 1675; SOFT-NEXT: vmov d17, r1, r0 1676; SOFT-NEXT: vadd.f64 d19, d16, d16 1677; SOFT-NEXT: vadd.f64 d18, d17, d17 1678; SOFT-NEXT: vrev64.32 q8, q9 1679; SOFT-NEXT: vadd.f32 q8, q8, q8 1680; SOFT-NEXT: vrev64.32 q8, q8 1681; SOFT-NEXT: vmov r1, r0, d16 1682; SOFT-NEXT: vmov r3, r2, d17 1683; SOFT-NEXT: bx lr 1684; 1685; HARD-LABEL: test_v4f32_v2f64: 1686; HARD: @ %bb.0: 1687; HARD-NEXT: vadd.f64 d17, d1, d1 1688; HARD-NEXT: vadd.f64 d16, d0, d0 1689; HARD-NEXT: vrev64.32 q8, q8 1690; HARD-NEXT: vadd.f32 q8, q8, q8 1691; HARD-NEXT: vrev64.32 q0, q8 1692; HARD-NEXT: bx lr 1693 %1 = fadd <2 x double> %p, %p 1694 %2 = bitcast <2 x double> %1 to <4 x float> 1695 %3 = fadd <4 x float> %2, %2 1696 ret <4 x float> %3 1697} 1698 1699define <4 x float> @test_v4f32_v2i64(<2 x i64> %p) { 1700; SOFT-LABEL: test_v4f32_v2i64: 1701; SOFT: @ %bb.0: 1702; SOFT-NEXT: vmov d17, r3, r2 1703; SOFT-NEXT: vmov d16, r1, r0 1704; SOFT-NEXT: vadd.i64 q8, q8, q8 1705; SOFT-NEXT: vrev64.32 q8, q8 1706; SOFT-NEXT: vadd.f32 q8, q8, q8 1707; SOFT-NEXT: vrev64.32 q8, q8 1708; SOFT-NEXT: vmov r1, r0, d16 1709; SOFT-NEXT: vmov r3, r2, d17 1710; SOFT-NEXT: bx lr 1711; 1712; HARD-LABEL: test_v4f32_v2i64: 1713; HARD: @ %bb.0: 1714; HARD-NEXT: vadd.i64 q8, q0, q0 1715; HARD-NEXT: vrev64.32 q8, q8 1716; HARD-NEXT: vadd.f32 q8, q8, q8 1717; HARD-NEXT: vrev64.32 q0, q8 1718; HARD-NEXT: bx lr 1719 %1 = add <2 x i64> %p, %p 1720 %2 = bitcast <2 x i64> %1 to <4 x float> 1721 %3 = fadd <4 x float> %2, %2 1722 ret <4 x float> %3 1723} 1724 1725define <4 x float> @test_v4f32_v4i32(<4 x i32> %p) { 1726; SOFT-LABEL: test_v4f32_v4i32: 1727; SOFT: @ %bb.0: 1728; SOFT-NEXT: vmov d17, r3, r2 1729; SOFT-NEXT: vmov d16, r1, r0 1730; SOFT-NEXT: vrev64.32 q8, q8 1731; SOFT-NEXT: vadd.i32 q8, q8, q8 1732; SOFT-NEXT: vadd.f32 q8, q8, q8 1733; SOFT-NEXT: vrev64.32 q8, q8 1734; SOFT-NEXT: vmov r1, r0, d16 1735; SOFT-NEXT: vmov r3, r2, d17 1736; SOFT-NEXT: bx lr 1737; 1738; HARD-LABEL: test_v4f32_v4i32: 1739; HARD: @ %bb.0: 1740; HARD-NEXT: vrev64.32 q8, q0 1741; HARD-NEXT: vadd.i32 q8, q8, q8 1742; HARD-NEXT: vadd.f32 q8, q8, q8 1743; HARD-NEXT: vrev64.32 q0, q8 1744; HARD-NEXT: bx lr 1745 %1 = add <4 x i32> %p, %p 1746 %2 = bitcast <4 x i32> %1 to <4 x float> 1747 %3 = fadd <4 x float> %2, %2 1748 ret <4 x float> %3 1749} 1750 1751define <4 x float> @test_v4f32_v8i16(<8 x i16> %p) { 1752; SOFT-LABEL: test_v4f32_v8i16: 1753; SOFT: @ %bb.0: 1754; SOFT-NEXT: vmov d17, r3, r2 1755; SOFT-NEXT: vmov d16, r1, r0 1756; SOFT-NEXT: vrev64.16 q8, q8 1757; SOFT-NEXT: vadd.i16 q8, q8, q8 1758; SOFT-NEXT: vrev32.16 q8, q8 1759; SOFT-NEXT: vadd.f32 q8, q8, q8 1760; SOFT-NEXT: vrev64.32 q8, q8 1761; SOFT-NEXT: vmov r1, r0, d16 1762; SOFT-NEXT: vmov r3, r2, d17 1763; SOFT-NEXT: bx lr 1764; 1765; HARD-LABEL: test_v4f32_v8i16: 1766; HARD: @ %bb.0: 1767; HARD-NEXT: vrev64.16 q8, q0 1768; HARD-NEXT: vadd.i16 q8, q8, q8 1769; HARD-NEXT: vrev32.16 q8, q8 1770; HARD-NEXT: vadd.f32 q8, q8, q8 1771; HARD-NEXT: vrev64.32 q0, q8 1772; HARD-NEXT: bx lr 1773 %1 = add <8 x i16> %p, %p 1774 %2 = bitcast <8 x i16> %1 to <4 x float> 1775 %3 = fadd <4 x float> %2, %2 1776 ret <4 x float> %3 1777} 1778 1779define <4 x float> @test_v4f32_v16i8(<16 x i8> %p) { 1780; SOFT-LABEL: test_v4f32_v16i8: 1781; SOFT: @ %bb.0: 1782; SOFT-NEXT: vmov d17, r3, r2 1783; SOFT-NEXT: vmov d16, r1, r0 1784; SOFT-NEXT: vrev64.8 q8, q8 1785; SOFT-NEXT: vadd.i8 q8, q8, q8 1786; SOFT-NEXT: vrev32.8 q8, q8 1787; SOFT-NEXT: vadd.f32 q8, q8, q8 1788; SOFT-NEXT: vrev64.32 q8, q8 1789; SOFT-NEXT: vmov r1, r0, d16 1790; SOFT-NEXT: vmov r3, r2, d17 1791; SOFT-NEXT: bx lr 1792; 1793; HARD-LABEL: test_v4f32_v16i8: 1794; HARD: @ %bb.0: 1795; HARD-NEXT: vrev64.8 q8, q0 1796; HARD-NEXT: vadd.i8 q8, q8, q8 1797; HARD-NEXT: vrev32.8 q8, q8 1798; HARD-NEXT: vadd.f32 q8, q8, q8 1799; HARD-NEXT: vrev64.32 q0, q8 1800; HARD-NEXT: bx lr 1801 %1 = add <16 x i8> %p, %p 1802 %2 = bitcast <16 x i8> %1 to <4 x float> 1803 %3 = fadd <4 x float> %2, %2 1804 ret <4 x float> %3 1805} 1806 1807define <4 x i32> @test_v4i32_f128(fp128 %p) { 1808; SOFT-LABEL: test_v4i32_f128: 1809; SOFT: @ %bb.0: 1810; SOFT-NEXT: .save {r11, lr} 1811; SOFT-NEXT: push {r11, lr} 1812; SOFT-NEXT: .pad #16 1813; SOFT-NEXT: sub sp, sp, #16 1814; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1815; SOFT-NEXT: bl __addtf3 1816; SOFT-NEXT: vmov.32 d17[0], r2 1817; SOFT-NEXT: vmov.32 d16[0], r0 1818; SOFT-NEXT: vmov.32 d17[1], r3 1819; SOFT-NEXT: vmov.32 d16[1], r1 1820; SOFT-NEXT: vadd.i32 q8, q8, q8 1821; SOFT-NEXT: vrev64.32 q8, q8 1822; SOFT-NEXT: vmov r1, r0, d16 1823; SOFT-NEXT: vmov r3, r2, d17 1824; SOFT-NEXT: add sp, sp, #16 1825; SOFT-NEXT: pop {r11, pc} 1826; 1827; HARD-LABEL: test_v4i32_f128: 1828; HARD: @ %bb.0: 1829; HARD-NEXT: .save {r11, lr} 1830; HARD-NEXT: push {r11, lr} 1831; HARD-NEXT: .pad #16 1832; HARD-NEXT: sub sp, sp, #16 1833; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1834; HARD-NEXT: bl __addtf3 1835; HARD-NEXT: vmov.32 d17[0], r2 1836; HARD-NEXT: vmov.32 d16[0], r0 1837; HARD-NEXT: vmov.32 d17[1], r3 1838; HARD-NEXT: vmov.32 d16[1], r1 1839; HARD-NEXT: vadd.i32 q8, q8, q8 1840; HARD-NEXT: vrev64.32 q0, q8 1841; HARD-NEXT: add sp, sp, #16 1842; HARD-NEXT: pop {r11, pc} 1843 %1 = fadd fp128 %p, %p 1844 %2 = bitcast fp128 %1 to <4 x i32> 1845 %3 = add <4 x i32> %2, %2 1846 ret <4 x i32> %3 1847} 1848 1849define <4 x i32> @test_v4i32_v2f64(<2 x double> %p) { 1850; SOFT-LABEL: test_v4i32_v2f64: 1851; SOFT: @ %bb.0: 1852; SOFT-NEXT: vmov d16, r3, r2 1853; SOFT-NEXT: vmov d17, r1, r0 1854; SOFT-NEXT: vadd.f64 d19, d16, d16 1855; SOFT-NEXT: vadd.f64 d18, d17, d17 1856; SOFT-NEXT: vrev64.32 q8, q9 1857; SOFT-NEXT: vadd.i32 q8, q8, q8 1858; SOFT-NEXT: vrev64.32 q8, q8 1859; SOFT-NEXT: vmov r1, r0, d16 1860; SOFT-NEXT: vmov r3, r2, d17 1861; SOFT-NEXT: bx lr 1862; 1863; HARD-LABEL: test_v4i32_v2f64: 1864; HARD: @ %bb.0: 1865; HARD-NEXT: vadd.f64 d17, d1, d1 1866; HARD-NEXT: vadd.f64 d16, d0, d0 1867; HARD-NEXT: vrev64.32 q8, q8 1868; HARD-NEXT: vadd.i32 q8, q8, q8 1869; HARD-NEXT: vrev64.32 q0, q8 1870; HARD-NEXT: bx lr 1871 %1 = fadd <2 x double> %p, %p 1872 %2 = bitcast <2 x double> %1 to <4 x i32> 1873 %3 = add <4 x i32> %2, %2 1874 ret <4 x i32> %3 1875} 1876 1877define <4 x i32> @test_v4i32_v2i64(<2 x i64> %p) { 1878; SOFT-LABEL: test_v4i32_v2i64: 1879; SOFT: @ %bb.0: 1880; SOFT-NEXT: vmov d17, r3, r2 1881; SOFT-NEXT: vmov d16, r1, r0 1882; SOFT-NEXT: vadd.i64 q8, q8, q8 1883; SOFT-NEXT: vrev64.32 q8, q8 1884; SOFT-NEXT: vadd.i32 q8, q8, q8 1885; SOFT-NEXT: vrev64.32 q8, q8 1886; SOFT-NEXT: vmov r1, r0, d16 1887; SOFT-NEXT: vmov r3, r2, d17 1888; SOFT-NEXT: bx lr 1889; 1890; HARD-LABEL: test_v4i32_v2i64: 1891; HARD: @ %bb.0: 1892; HARD-NEXT: vadd.i64 q8, q0, q0 1893; HARD-NEXT: vrev64.32 q8, q8 1894; HARD-NEXT: vadd.i32 q8, q8, q8 1895; HARD-NEXT: vrev64.32 q0, q8 1896; HARD-NEXT: bx lr 1897 %1 = add <2 x i64> %p, %p 1898 %2 = bitcast <2 x i64> %1 to <4 x i32> 1899 %3 = add <4 x i32> %2, %2 1900 ret <4 x i32> %3 1901} 1902 1903define <4 x i32> @test_v4i32_v4f32(<4 x float> %p) { 1904; SOFT-LABEL: test_v4i32_v4f32: 1905; SOFT: @ %bb.0: 1906; SOFT-NEXT: vmov d17, r3, r2 1907; SOFT-NEXT: vmov d16, r1, r0 1908; SOFT-NEXT: vrev64.32 q8, q8 1909; SOFT-NEXT: vadd.f32 q8, q8, q8 1910; SOFT-NEXT: vadd.i32 q8, q8, q8 1911; SOFT-NEXT: vrev64.32 q8, q8 1912; SOFT-NEXT: vmov r1, r0, d16 1913; SOFT-NEXT: vmov r3, r2, d17 1914; SOFT-NEXT: bx lr 1915; 1916; HARD-LABEL: test_v4i32_v4f32: 1917; HARD: @ %bb.0: 1918; HARD-NEXT: vrev64.32 q8, q0 1919; HARD-NEXT: vadd.f32 q8, q8, q8 1920; HARD-NEXT: vadd.i32 q8, q8, q8 1921; HARD-NEXT: vrev64.32 q0, q8 1922; HARD-NEXT: bx lr 1923 %1 = fadd <4 x float> %p, %p 1924 %2 = bitcast <4 x float> %1 to <4 x i32> 1925 %3 = add <4 x i32> %2, %2 1926 ret <4 x i32> %3 1927} 1928 1929define <4 x i32> @test_v4i32_v8i16(<8 x i16> %p) { 1930; SOFT-LABEL: test_v4i32_v8i16: 1931; SOFT: @ %bb.0: 1932; SOFT-NEXT: vmov d17, r3, r2 1933; SOFT-NEXT: vmov d16, r1, r0 1934; SOFT-NEXT: vrev64.16 q8, q8 1935; SOFT-NEXT: vadd.i16 q8, q8, q8 1936; SOFT-NEXT: vrev32.16 q8, q8 1937; SOFT-NEXT: vadd.i32 q8, q8, q8 1938; SOFT-NEXT: vrev64.32 q8, q8 1939; SOFT-NEXT: vmov r1, r0, d16 1940; SOFT-NEXT: vmov r3, r2, d17 1941; SOFT-NEXT: bx lr 1942; 1943; HARD-LABEL: test_v4i32_v8i16: 1944; HARD: @ %bb.0: 1945; HARD-NEXT: vrev64.16 q8, q0 1946; HARD-NEXT: vadd.i16 q8, q8, q8 1947; HARD-NEXT: vrev32.16 q8, q8 1948; HARD-NEXT: vadd.i32 q8, q8, q8 1949; HARD-NEXT: vrev64.32 q0, q8 1950; HARD-NEXT: bx lr 1951 %1 = add <8 x i16> %p, %p 1952 %2 = bitcast <8 x i16> %1 to <4 x i32> 1953 %3 = add <4 x i32> %2, %2 1954 ret <4 x i32> %3 1955} 1956 1957define <4 x i32> @test_v4i32_v16i8(<16 x i8> %p) { 1958; SOFT-LABEL: test_v4i32_v16i8: 1959; SOFT: @ %bb.0: 1960; SOFT-NEXT: vmov d17, r3, r2 1961; SOFT-NEXT: vmov d16, r1, r0 1962; SOFT-NEXT: vrev64.8 q8, q8 1963; SOFT-NEXT: vadd.i8 q8, q8, q8 1964; SOFT-NEXT: vrev32.8 q8, q8 1965; SOFT-NEXT: vadd.i32 q8, q8, q8 1966; SOFT-NEXT: vrev64.32 q8, q8 1967; SOFT-NEXT: vmov r1, r0, d16 1968; SOFT-NEXT: vmov r3, r2, d17 1969; SOFT-NEXT: bx lr 1970; 1971; HARD-LABEL: test_v4i32_v16i8: 1972; HARD: @ %bb.0: 1973; HARD-NEXT: vrev64.8 q8, q0 1974; HARD-NEXT: vadd.i8 q8, q8, q8 1975; HARD-NEXT: vrev32.8 q8, q8 1976; HARD-NEXT: vadd.i32 q8, q8, q8 1977; HARD-NEXT: vrev64.32 q0, q8 1978; HARD-NEXT: bx lr 1979 %1 = add <16 x i8> %p, %p 1980 %2 = bitcast <16 x i8> %1 to <4 x i32> 1981 %3 = add <4 x i32> %2, %2 1982 ret <4 x i32> %3 1983} 1984 1985define <8 x i16> @test_v8i16_f128(fp128 %p) { 1986; SOFT-LABEL: test_v8i16_f128: 1987; SOFT: @ %bb.0: 1988; SOFT-NEXT: .save {r11, lr} 1989; SOFT-NEXT: push {r11, lr} 1990; SOFT-NEXT: .pad #16 1991; SOFT-NEXT: sub sp, sp, #16 1992; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1993; SOFT-NEXT: bl __addtf3 1994; SOFT-NEXT: vmov.32 d17[0], r2 1995; SOFT-NEXT: vmov.32 d16[0], r0 1996; SOFT-NEXT: vmov.32 d17[1], r3 1997; SOFT-NEXT: vmov.32 d16[1], r1 1998; SOFT-NEXT: vrev32.16 q8, q8 1999; SOFT-NEXT: vadd.i16 q8, q8, q8 2000; SOFT-NEXT: vrev64.16 q8, q8 2001; SOFT-NEXT: vmov r1, r0, d16 2002; SOFT-NEXT: vmov r3, r2, d17 2003; SOFT-NEXT: add sp, sp, #16 2004; SOFT-NEXT: pop {r11, pc} 2005; 2006; HARD-LABEL: test_v8i16_f128: 2007; HARD: @ %bb.0: 2008; HARD-NEXT: .save {r11, lr} 2009; HARD-NEXT: push {r11, lr} 2010; HARD-NEXT: .pad #16 2011; HARD-NEXT: sub sp, sp, #16 2012; HARD-NEXT: stm sp, {r0, r1, r2, r3} 2013; HARD-NEXT: bl __addtf3 2014; HARD-NEXT: vmov.32 d17[0], r2 2015; HARD-NEXT: vmov.32 d16[0], r0 2016; HARD-NEXT: vmov.32 d17[1], r3 2017; HARD-NEXT: vmov.32 d16[1], r1 2018; HARD-NEXT: vrev32.16 q8, q8 2019; HARD-NEXT: vadd.i16 q8, q8, q8 2020; HARD-NEXT: vrev64.16 q0, q8 2021; HARD-NEXT: add sp, sp, #16 2022; HARD-NEXT: pop {r11, pc} 2023 %1 = fadd fp128 %p, %p 2024 %2 = bitcast fp128 %1 to <8 x i16> 2025 %3 = add <8 x i16> %2, %2 2026 ret <8 x i16> %3 2027} 2028 2029define <8 x i16> @test_v8i16_v2f64(<2 x double> %p) { 2030; SOFT-LABEL: test_v8i16_v2f64: 2031; SOFT: @ %bb.0: 2032; SOFT-NEXT: vmov d16, r3, r2 2033; SOFT-NEXT: vmov d17, r1, r0 2034; SOFT-NEXT: vadd.f64 d19, d16, d16 2035; SOFT-NEXT: vadd.f64 d18, d17, d17 2036; SOFT-NEXT: vrev64.16 q8, q9 2037; SOFT-NEXT: vadd.i16 q8, q8, q8 2038; SOFT-NEXT: vrev64.16 q8, q8 2039; SOFT-NEXT: vmov r1, r0, d16 2040; SOFT-NEXT: vmov r3, r2, d17 2041; SOFT-NEXT: bx lr 2042; 2043; HARD-LABEL: test_v8i16_v2f64: 2044; HARD: @ %bb.0: 2045; HARD-NEXT: vadd.f64 d17, d1, d1 2046; HARD-NEXT: vadd.f64 d16, d0, d0 2047; HARD-NEXT: vrev64.16 q8, q8 2048; HARD-NEXT: vadd.i16 q8, q8, q8 2049; HARD-NEXT: vrev64.16 q0, q8 2050; HARD-NEXT: bx lr 2051 %1 = fadd <2 x double> %p, %p 2052 %2 = bitcast <2 x double> %1 to <8 x i16> 2053 %3 = add <8 x i16> %2, %2 2054 ret <8 x i16> %3 2055} 2056 2057define <8 x i16> @test_v8i16_v2i64(<2 x i64> %p) { 2058; SOFT-LABEL: test_v8i16_v2i64: 2059; SOFT: @ %bb.0: 2060; SOFT-NEXT: vmov d17, r3, r2 2061; SOFT-NEXT: vmov d16, r1, r0 2062; SOFT-NEXT: vadd.i64 q8, q8, q8 2063; SOFT-NEXT: vrev64.16 q8, q8 2064; SOFT-NEXT: vadd.i16 q8, q8, q8 2065; SOFT-NEXT: vrev64.16 q8, q8 2066; SOFT-NEXT: vmov r1, r0, d16 2067; SOFT-NEXT: vmov r3, r2, d17 2068; SOFT-NEXT: bx lr 2069; 2070; HARD-LABEL: test_v8i16_v2i64: 2071; HARD: @ %bb.0: 2072; HARD-NEXT: vadd.i64 q8, q0, q0 2073; HARD-NEXT: vrev64.16 q8, q8 2074; HARD-NEXT: vadd.i16 q8, q8, q8 2075; HARD-NEXT: vrev64.16 q0, q8 2076; HARD-NEXT: bx lr 2077 %1 = add <2 x i64> %p, %p 2078 %2 = bitcast <2 x i64> %1 to <8 x i16> 2079 %3 = add <8 x i16> %2, %2 2080 ret <8 x i16> %3 2081} 2082 2083define <8 x i16> @test_v8i16_v4f32(<4 x float> %p) { 2084; SOFT-LABEL: test_v8i16_v4f32: 2085; SOFT: @ %bb.0: 2086; SOFT-NEXT: vmov d17, r3, r2 2087; SOFT-NEXT: vmov d16, r1, r0 2088; SOFT-NEXT: vrev64.32 q8, q8 2089; SOFT-NEXT: vadd.f32 q8, q8, q8 2090; SOFT-NEXT: vrev32.16 q8, q8 2091; SOFT-NEXT: vadd.i16 q8, q8, q8 2092; SOFT-NEXT: vrev64.16 q8, q8 2093; SOFT-NEXT: vmov r1, r0, d16 2094; SOFT-NEXT: vmov r3, r2, d17 2095; SOFT-NEXT: bx lr 2096; 2097; HARD-LABEL: test_v8i16_v4f32: 2098; HARD: @ %bb.0: 2099; HARD-NEXT: vrev64.32 q8, q0 2100; HARD-NEXT: vadd.f32 q8, q8, q8 2101; HARD-NEXT: vrev32.16 q8, q8 2102; HARD-NEXT: vadd.i16 q8, q8, q8 2103; HARD-NEXT: vrev64.16 q0, q8 2104; HARD-NEXT: bx lr 2105 %1 = fadd <4 x float> %p, %p 2106 %2 = bitcast <4 x float> %1 to <8 x i16> 2107 %3 = add <8 x i16> %2, %2 2108 ret <8 x i16> %3 2109} 2110 2111define <8 x i16> @test_v8i16_v4i32(<4 x i32> %p) { 2112; SOFT-LABEL: test_v8i16_v4i32: 2113; SOFT: @ %bb.0: 2114; SOFT-NEXT: vmov d17, r3, r2 2115; SOFT-NEXT: vmov d16, r1, r0 2116; SOFT-NEXT: vrev64.32 q8, q8 2117; SOFT-NEXT: vadd.i32 q8, q8, q8 2118; SOFT-NEXT: vrev32.16 q8, q8 2119; SOFT-NEXT: vadd.i16 q8, q8, q8 2120; SOFT-NEXT: vrev64.16 q8, q8 2121; SOFT-NEXT: vmov r1, r0, d16 2122; SOFT-NEXT: vmov r3, r2, d17 2123; SOFT-NEXT: bx lr 2124; 2125; HARD-LABEL: test_v8i16_v4i32: 2126; HARD: @ %bb.0: 2127; HARD-NEXT: vrev64.32 q8, q0 2128; HARD-NEXT: vadd.i32 q8, q8, q8 2129; HARD-NEXT: vrev32.16 q8, q8 2130; HARD-NEXT: vadd.i16 q8, q8, q8 2131; HARD-NEXT: vrev64.16 q0, q8 2132; HARD-NEXT: bx lr 2133 %1 = add <4 x i32> %p, %p 2134 %2 = bitcast <4 x i32> %1 to <8 x i16> 2135 %3 = add <8 x i16> %2, %2 2136 ret <8 x i16> %3 2137} 2138 2139define <8 x i16> @test_v8i16_v16i8(<16 x i8> %p) { 2140; SOFT-LABEL: test_v8i16_v16i8: 2141; SOFT: @ %bb.0: 2142; SOFT-NEXT: vmov d17, r3, r2 2143; SOFT-NEXT: vmov d16, r1, r0 2144; SOFT-NEXT: vrev64.8 q8, q8 2145; SOFT-NEXT: vadd.i8 q8, q8, q8 2146; SOFT-NEXT: vrev16.8 q8, q8 2147; SOFT-NEXT: vadd.i16 q8, q8, q8 2148; SOFT-NEXT: vrev64.16 q8, q8 2149; SOFT-NEXT: vmov r1, r0, d16 2150; SOFT-NEXT: vmov r3, r2, d17 2151; SOFT-NEXT: bx lr 2152; 2153; HARD-LABEL: test_v8i16_v16i8: 2154; HARD: @ %bb.0: 2155; HARD-NEXT: vrev64.8 q8, q0 2156; HARD-NEXT: vadd.i8 q8, q8, q8 2157; HARD-NEXT: vrev16.8 q8, q8 2158; HARD-NEXT: vadd.i16 q8, q8, q8 2159; HARD-NEXT: vrev64.16 q0, q8 2160; HARD-NEXT: bx lr 2161 %1 = add <16 x i8> %p, %p 2162 %2 = bitcast <16 x i8> %1 to <8 x i16> 2163 %3 = add <8 x i16> %2, %2 2164 ret <8 x i16> %3 2165} 2166 2167define <16 x i8> @test_v16i8_f128(fp128 %p) { 2168; SOFT-LABEL: test_v16i8_f128: 2169; SOFT: @ %bb.0: 2170; SOFT-NEXT: .save {r11, lr} 2171; SOFT-NEXT: push {r11, lr} 2172; SOFT-NEXT: .pad #16 2173; SOFT-NEXT: sub sp, sp, #16 2174; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 2175; SOFT-NEXT: bl __addtf3 2176; SOFT-NEXT: vmov.32 d17[0], r2 2177; SOFT-NEXT: vmov.32 d16[0], r0 2178; SOFT-NEXT: vmov.32 d17[1], r3 2179; SOFT-NEXT: vmov.32 d16[1], r1 2180; SOFT-NEXT: vrev32.8 q8, q8 2181; SOFT-NEXT: vadd.i8 q8, q8, q8 2182; SOFT-NEXT: vrev64.8 q8, q8 2183; SOFT-NEXT: vmov r1, r0, d16 2184; SOFT-NEXT: vmov r3, r2, d17 2185; SOFT-NEXT: add sp, sp, #16 2186; SOFT-NEXT: pop {r11, pc} 2187; 2188; HARD-LABEL: test_v16i8_f128: 2189; HARD: @ %bb.0: 2190; HARD-NEXT: .save {r11, lr} 2191; HARD-NEXT: push {r11, lr} 2192; HARD-NEXT: .pad #16 2193; HARD-NEXT: sub sp, sp, #16 2194; HARD-NEXT: stm sp, {r0, r1, r2, r3} 2195; HARD-NEXT: bl __addtf3 2196; HARD-NEXT: vmov.32 d17[0], r2 2197; HARD-NEXT: vmov.32 d16[0], r0 2198; HARD-NEXT: vmov.32 d17[1], r3 2199; HARD-NEXT: vmov.32 d16[1], r1 2200; HARD-NEXT: vrev32.8 q8, q8 2201; HARD-NEXT: vadd.i8 q8, q8, q8 2202; HARD-NEXT: vrev64.8 q0, q8 2203; HARD-NEXT: add sp, sp, #16 2204; HARD-NEXT: pop {r11, pc} 2205 %1 = fadd fp128 %p, %p 2206 %2 = bitcast fp128 %1 to <16 x i8> 2207 %3 = add <16 x i8> %2, %2 2208 ret <16 x i8> %3 2209} 2210 2211define <16 x i8> @test_v16i8_v2f64(<2 x double> %p) { 2212; SOFT-LABEL: test_v16i8_v2f64: 2213; SOFT: @ %bb.0: 2214; SOFT-NEXT: vmov d16, r3, r2 2215; SOFT-NEXT: vmov d17, r1, r0 2216; SOFT-NEXT: vadd.f64 d19, d16, d16 2217; SOFT-NEXT: vadd.f64 d18, d17, d17 2218; SOFT-NEXT: vrev64.8 q8, q9 2219; SOFT-NEXT: vadd.i8 q8, q8, q8 2220; SOFT-NEXT: vrev64.8 q8, q8 2221; SOFT-NEXT: vmov r1, r0, d16 2222; SOFT-NEXT: vmov r3, r2, d17 2223; SOFT-NEXT: bx lr 2224; 2225; HARD-LABEL: test_v16i8_v2f64: 2226; HARD: @ %bb.0: 2227; HARD-NEXT: vadd.f64 d17, d1, d1 2228; HARD-NEXT: vadd.f64 d16, d0, d0 2229; HARD-NEXT: vrev64.8 q8, q8 2230; HARD-NEXT: vadd.i8 q8, q8, q8 2231; HARD-NEXT: vrev64.8 q0, q8 2232; HARD-NEXT: bx lr 2233 %1 = fadd <2 x double> %p, %p 2234 %2 = bitcast <2 x double> %1 to <16 x i8> 2235 %3 = add <16 x i8> %2, %2 2236 ret <16 x i8> %3 2237} 2238 2239define <16 x i8> @test_v16i8_v2i64(<2 x i64> %p) { 2240; SOFT-LABEL: test_v16i8_v2i64: 2241; SOFT: @ %bb.0: 2242; SOFT-NEXT: vmov d17, r3, r2 2243; SOFT-NEXT: vmov d16, r1, r0 2244; SOFT-NEXT: vadd.i64 q8, q8, q8 2245; SOFT-NEXT: vrev64.8 q8, q8 2246; SOFT-NEXT: vadd.i8 q8, q8, q8 2247; SOFT-NEXT: vrev64.8 q8, q8 2248; SOFT-NEXT: vmov r1, r0, d16 2249; SOFT-NEXT: vmov r3, r2, d17 2250; SOFT-NEXT: bx lr 2251; 2252; HARD-LABEL: test_v16i8_v2i64: 2253; HARD: @ %bb.0: 2254; HARD-NEXT: vadd.i64 q8, q0, q0 2255; HARD-NEXT: vrev64.8 q8, q8 2256; HARD-NEXT: vadd.i8 q8, q8, q8 2257; HARD-NEXT: vrev64.8 q0, q8 2258; HARD-NEXT: bx lr 2259 %1 = add <2 x i64> %p, %p 2260 %2 = bitcast <2 x i64> %1 to <16 x i8> 2261 %3 = add <16 x i8> %2, %2 2262 ret <16 x i8> %3 2263} 2264 2265define <16 x i8> @test_v16i8_v4f32(<4 x float> %p) { 2266; SOFT-LABEL: test_v16i8_v4f32: 2267; SOFT: @ %bb.0: 2268; SOFT-NEXT: vmov d17, r3, r2 2269; SOFT-NEXT: vmov d16, r1, r0 2270; SOFT-NEXT: vrev64.32 q8, q8 2271; SOFT-NEXT: vadd.f32 q8, q8, q8 2272; SOFT-NEXT: vrev32.8 q8, q8 2273; SOFT-NEXT: vadd.i8 q8, q8, q8 2274; SOFT-NEXT: vrev64.8 q8, q8 2275; SOFT-NEXT: vmov r1, r0, d16 2276; SOFT-NEXT: vmov r3, r2, d17 2277; SOFT-NEXT: bx lr 2278; 2279; HARD-LABEL: test_v16i8_v4f32: 2280; HARD: @ %bb.0: 2281; HARD-NEXT: vrev64.32 q8, q0 2282; HARD-NEXT: vadd.f32 q8, q8, q8 2283; HARD-NEXT: vrev32.8 q8, q8 2284; HARD-NEXT: vadd.i8 q8, q8, q8 2285; HARD-NEXT: vrev64.8 q0, q8 2286; HARD-NEXT: bx lr 2287 %1 = fadd <4 x float> %p, %p 2288 %2 = bitcast <4 x float> %1 to <16 x i8> 2289 %3 = add <16 x i8> %2, %2 2290 ret <16 x i8> %3 2291} 2292 2293define <16 x i8> @test_v16i8_v4i32(<4 x i32> %p) { 2294; SOFT-LABEL: test_v16i8_v4i32: 2295; SOFT: @ %bb.0: 2296; SOFT-NEXT: vmov d17, r3, r2 2297; SOFT-NEXT: vmov d16, r1, r0 2298; SOFT-NEXT: vrev64.32 q8, q8 2299; SOFT-NEXT: vadd.i32 q8, q8, q8 2300; SOFT-NEXT: vrev32.8 q8, q8 2301; SOFT-NEXT: vadd.i8 q8, q8, q8 2302; SOFT-NEXT: vrev64.8 q8, q8 2303; SOFT-NEXT: vmov r1, r0, d16 2304; SOFT-NEXT: vmov r3, r2, d17 2305; SOFT-NEXT: bx lr 2306; 2307; HARD-LABEL: test_v16i8_v4i32: 2308; HARD: @ %bb.0: 2309; HARD-NEXT: vrev64.32 q8, q0 2310; HARD-NEXT: vadd.i32 q8, q8, q8 2311; HARD-NEXT: vrev32.8 q8, q8 2312; HARD-NEXT: vadd.i8 q8, q8, q8 2313; HARD-NEXT: vrev64.8 q0, q8 2314; HARD-NEXT: bx lr 2315 %1 = add <4 x i32> %p, %p 2316 %2 = bitcast <4 x i32> %1 to <16 x i8> 2317 %3 = add <16 x i8> %2, %2 2318 ret <16 x i8> %3 2319} 2320 2321define <16 x i8> @test_v16i8_v8i16(<8 x i16> %p) { 2322; SOFT-LABEL: test_v16i8_v8i16: 2323; SOFT: @ %bb.0: 2324; SOFT-NEXT: vmov d17, r3, r2 2325; SOFT-NEXT: vmov d16, r1, r0 2326; SOFT-NEXT: vrev64.16 q8, q8 2327; SOFT-NEXT: vadd.i16 q8, q8, q8 2328; SOFT-NEXT: vrev16.8 q8, q8 2329; SOFT-NEXT: vadd.i8 q8, q8, q8 2330; SOFT-NEXT: vrev64.8 q8, q8 2331; SOFT-NEXT: vmov r1, r0, d16 2332; SOFT-NEXT: vmov r3, r2, d17 2333; SOFT-NEXT: bx lr 2334; 2335; HARD-LABEL: test_v16i8_v8i16: 2336; HARD: @ %bb.0: 2337; HARD-NEXT: vrev64.16 q8, q0 2338; HARD-NEXT: vadd.i16 q8, q8, q8 2339; HARD-NEXT: vrev16.8 q8, q8 2340; HARD-NEXT: vadd.i8 q8, q8, q8 2341; HARD-NEXT: vrev64.8 q0, q8 2342; HARD-NEXT: bx lr 2343 %1 = add <8 x i16> %p, %p 2344 %2 = bitcast <8 x i16> %1 to <16 x i8> 2345 %3 = add <16 x i8> %2, %2 2346 ret <16 x i8> %3 2347} 2348