1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix SOFT 3; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix HARD 4 5declare i64 @test_i64_f64_helper(double %p) 6define void @test_i64_f64(ptr %p, ptr %q) { 7; SOFT-LABEL: test_i64_f64: 8; SOFT: @ %bb.0: 9; SOFT-NEXT: .save {r4, lr} 10; SOFT-NEXT: push {r4, lr} 11; SOFT-NEXT: vldr d16, [r0] 12; SOFT-NEXT: mov r4, r1 13; SOFT-NEXT: vadd.f64 d16, d16, d16 14; SOFT-NEXT: vmov r1, r0, d16 15; SOFT-NEXT: bl test_i64_f64_helper 16; SOFT-NEXT: adds r1, r1, r1 17; SOFT-NEXT: adc r0, r0, r0 18; SOFT-NEXT: strd r0, r1, [r4] 19; SOFT-NEXT: pop {r4, pc} 20; 21; HARD-LABEL: test_i64_f64: 22; HARD: @ %bb.0: 23; HARD-NEXT: .save {r4, lr} 24; HARD-NEXT: push {r4, lr} 25; HARD-NEXT: vldr d16, [r0] 26; HARD-NEXT: mov r4, r1 27; HARD-NEXT: vadd.f64 d0, d16, d16 28; HARD-NEXT: bl test_i64_f64_helper 29; HARD-NEXT: adds r1, r1, r1 30; HARD-NEXT: adc r0, r0, r0 31; HARD-NEXT: strd r0, r1, [r4] 32; HARD-NEXT: pop {r4, pc} 33 %1 = load double, ptr %p 34 %2 = fadd double %1, %1 35 %3 = call i64 @test_i64_f64_helper(double %2) 36 %4 = add i64 %3, %3 37 store i64 %4, ptr %q 38 ret void 39} 40 41declare i64 @test_i64_v1i64_helper(<1 x i64> %p) 42define void @test_i64_v1i64(ptr %p, ptr %q) { 43; SOFT-LABEL: test_i64_v1i64: 44; SOFT: @ %bb.0: 45; SOFT-NEXT: .save {r4, lr} 46; SOFT-NEXT: push {r4, lr} 47; SOFT-NEXT: vldr d16, [r0] 48; SOFT-NEXT: mov r4, r1 49; SOFT-NEXT: vadd.i64 d16, d16, d16 50; SOFT-NEXT: vmov r1, r0, d16 51; SOFT-NEXT: bl test_i64_v1i64_helper 52; SOFT-NEXT: adds r1, r1, r1 53; SOFT-NEXT: adc r0, r0, r0 54; SOFT-NEXT: strd r0, r1, [r4] 55; SOFT-NEXT: pop {r4, pc} 56; 57; HARD-LABEL: test_i64_v1i64: 58; HARD: @ %bb.0: 59; HARD-NEXT: .save {r4, lr} 60; HARD-NEXT: push {r4, lr} 61; HARD-NEXT: vldr d16, [r0] 62; HARD-NEXT: mov r4, r1 63; HARD-NEXT: vadd.i64 d0, d16, d16 64; HARD-NEXT: bl test_i64_v1i64_helper 65; HARD-NEXT: adds r1, r1, r1 66; HARD-NEXT: adc r0, r0, r0 67; HARD-NEXT: strd r0, r1, [r4] 68; HARD-NEXT: pop {r4, pc} 69 %1 = load <1 x i64>, ptr %p 70 %2 = add <1 x i64> %1, %1 71 %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2) 72 %4 = add i64 %3, %3 73 store i64 %4, ptr %q 74 ret void 75} 76 77declare i64 @test_i64_v2f32_helper(<2 x float> %p) 78define void @test_i64_v2f32(ptr %p, ptr %q) { 79; SOFT-LABEL: test_i64_v2f32: 80; SOFT: @ %bb.0: 81; SOFT-NEXT: .save {r4, lr} 82; SOFT-NEXT: push {r4, lr} 83; SOFT-NEXT: vldr d16, [r0] 84; SOFT-NEXT: mov r4, r1 85; SOFT-NEXT: vrev64.32 d16, d16 86; SOFT-NEXT: vadd.f32 d16, d16, d16 87; SOFT-NEXT: vrev64.32 d16, d16 88; SOFT-NEXT: vmov r1, r0, d16 89; SOFT-NEXT: bl test_i64_v2f32_helper 90; SOFT-NEXT: adds r1, r1, r1 91; SOFT-NEXT: adc r0, r0, r0 92; SOFT-NEXT: strd r0, r1, [r4] 93; SOFT-NEXT: pop {r4, pc} 94; 95; HARD-LABEL: test_i64_v2f32: 96; HARD: @ %bb.0: 97; HARD-NEXT: .save {r4, lr} 98; HARD-NEXT: push {r4, lr} 99; HARD-NEXT: vldr d16, [r0] 100; HARD-NEXT: mov r4, r1 101; HARD-NEXT: vrev64.32 d16, d16 102; HARD-NEXT: vadd.f32 d16, d16, d16 103; HARD-NEXT: vrev64.32 d0, d16 104; HARD-NEXT: bl test_i64_v2f32_helper 105; HARD-NEXT: adds r1, r1, r1 106; HARD-NEXT: adc r0, r0, r0 107; HARD-NEXT: strd r0, r1, [r4] 108; HARD-NEXT: pop {r4, pc} 109 %1 = load <2 x float>, ptr %p 110 %2 = fadd <2 x float> %1, %1 111 %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2) 112 %4 = add i64 %3, %3 113 store i64 %4, ptr %q 114 ret void 115} 116 117declare i64 @test_i64_v2i32_helper(<2 x i32> %p) 118define void @test_i64_v2i32(ptr %p, ptr %q) { 119; SOFT-LABEL: test_i64_v2i32: 120; SOFT: @ %bb.0: 121; SOFT-NEXT: .save {r4, lr} 122; SOFT-NEXT: push {r4, lr} 123; SOFT-NEXT: vldr d16, [r0] 124; SOFT-NEXT: mov r4, r1 125; SOFT-NEXT: vrev64.32 d16, d16 126; SOFT-NEXT: vadd.i32 d16, d16, d16 127; SOFT-NEXT: vrev64.32 d16, d16 128; SOFT-NEXT: vmov r1, r0, d16 129; SOFT-NEXT: bl test_i64_v2i32_helper 130; SOFT-NEXT: adds r1, r1, r1 131; SOFT-NEXT: adc r0, r0, r0 132; SOFT-NEXT: strd r0, r1, [r4] 133; SOFT-NEXT: pop {r4, pc} 134; 135; HARD-LABEL: test_i64_v2i32: 136; HARD: @ %bb.0: 137; HARD-NEXT: .save {r4, lr} 138; HARD-NEXT: push {r4, lr} 139; HARD-NEXT: vldr d16, [r0] 140; HARD-NEXT: mov r4, r1 141; HARD-NEXT: vrev64.32 d16, d16 142; HARD-NEXT: vadd.i32 d16, d16, d16 143; HARD-NEXT: vrev64.32 d0, d16 144; HARD-NEXT: bl test_i64_v2i32_helper 145; HARD-NEXT: adds r1, r1, r1 146; HARD-NEXT: adc r0, r0, r0 147; HARD-NEXT: strd r0, r1, [r4] 148; HARD-NEXT: pop {r4, pc} 149 %1 = load <2 x i32>, ptr %p 150 %2 = add <2 x i32> %1, %1 151 %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2) 152 %4 = add i64 %3, %3 153 store i64 %4, ptr %q 154 ret void 155} 156 157declare i64 @test_i64_v4i16_helper(<4 x i16> %p) 158define void @test_i64_v4i16(ptr %p, ptr %q) { 159; SOFT-LABEL: test_i64_v4i16: 160; SOFT: @ %bb.0: 161; SOFT-NEXT: .save {r4, lr} 162; SOFT-NEXT: push {r4, lr} 163; SOFT-NEXT: vldr d16, [r0] 164; SOFT-NEXT: mov r4, r1 165; SOFT-NEXT: vrev64.16 d16, d16 166; SOFT-NEXT: vadd.i16 d16, d16, d16 167; SOFT-NEXT: vrev64.16 d16, d16 168; SOFT-NEXT: vmov r1, r0, d16 169; SOFT-NEXT: bl test_i64_v4i16_helper 170; SOFT-NEXT: adds r1, r1, r1 171; SOFT-NEXT: adc r0, r0, r0 172; SOFT-NEXT: strd r0, r1, [r4] 173; SOFT-NEXT: pop {r4, pc} 174; 175; HARD-LABEL: test_i64_v4i16: 176; HARD: @ %bb.0: 177; HARD-NEXT: .save {r4, lr} 178; HARD-NEXT: push {r4, lr} 179; HARD-NEXT: vldr d16, [r0] 180; HARD-NEXT: mov r4, r1 181; HARD-NEXT: vrev64.16 d16, d16 182; HARD-NEXT: vadd.i16 d16, d16, d16 183; HARD-NEXT: vrev64.16 d0, d16 184; HARD-NEXT: bl test_i64_v4i16_helper 185; HARD-NEXT: adds r1, r1, r1 186; HARD-NEXT: adc r0, r0, r0 187; HARD-NEXT: strd r0, r1, [r4] 188; HARD-NEXT: pop {r4, pc} 189 %1 = load <4 x i16>, ptr %p 190 %2 = add <4 x i16> %1, %1 191 %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2) 192 %4 = add i64 %3, %3 193 store i64 %4, ptr %q 194 ret void 195} 196 197declare i64 @test_i64_v8i8_helper(<8 x i8> %p) 198define void @test_i64_v8i8(ptr %p, ptr %q) { 199; SOFT-LABEL: test_i64_v8i8: 200; SOFT: @ %bb.0: 201; SOFT-NEXT: .save {r4, lr} 202; SOFT-NEXT: push {r4, lr} 203; SOFT-NEXT: vldr d16, [r0] 204; SOFT-NEXT: mov r4, r1 205; SOFT-NEXT: vrev64.8 d16, d16 206; SOFT-NEXT: vadd.i8 d16, d16, d16 207; SOFT-NEXT: vrev64.8 d16, d16 208; SOFT-NEXT: vmov r1, r0, d16 209; SOFT-NEXT: bl test_i64_v8i8_helper 210; SOFT-NEXT: adds r1, r1, r1 211; SOFT-NEXT: adc r0, r0, r0 212; SOFT-NEXT: strd r0, r1, [r4] 213; SOFT-NEXT: pop {r4, pc} 214; 215; HARD-LABEL: test_i64_v8i8: 216; HARD: @ %bb.0: 217; HARD-NEXT: .save {r4, lr} 218; HARD-NEXT: push {r4, lr} 219; HARD-NEXT: vldr d16, [r0] 220; HARD-NEXT: mov r4, r1 221; HARD-NEXT: vrev64.8 d16, d16 222; HARD-NEXT: vadd.i8 d16, d16, d16 223; HARD-NEXT: vrev64.8 d0, d16 224; HARD-NEXT: bl test_i64_v8i8_helper 225; HARD-NEXT: adds r1, r1, r1 226; HARD-NEXT: adc r0, r0, r0 227; HARD-NEXT: strd r0, r1, [r4] 228; HARD-NEXT: pop {r4, pc} 229 %1 = load <8 x i8>, ptr %p 230 %2 = add <8 x i8> %1, %1 231 %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2) 232 %4 = add i64 %3, %3 233 store i64 %4, ptr %q 234 ret void 235} 236 237declare double @test_f64_i64_helper(i64 %p) 238define void @test_f64_i64(ptr %p, ptr %q) { 239; SOFT-LABEL: test_f64_i64: 240; SOFT: @ %bb.0: 241; SOFT-NEXT: .save {r4, lr} 242; SOFT-NEXT: push {r4, lr} 243; SOFT-NEXT: mov r4, r1 244; SOFT-NEXT: ldrd r0, r1, [r0] 245; SOFT-NEXT: adds r1, r1, r1 246; SOFT-NEXT: adc r0, r0, r0 247; SOFT-NEXT: bl test_f64_i64_helper 248; SOFT-NEXT: vmov d16, r1, r0 249; SOFT-NEXT: vadd.f64 d16, d16, d16 250; SOFT-NEXT: vstr d16, [r4] 251; SOFT-NEXT: pop {r4, pc} 252; 253; HARD-LABEL: test_f64_i64: 254; HARD: @ %bb.0: 255; HARD-NEXT: .save {r4, lr} 256; HARD-NEXT: push {r4, lr} 257; HARD-NEXT: mov r4, r1 258; HARD-NEXT: ldrd r0, r1, [r0] 259; HARD-NEXT: adds r1, r1, r1 260; HARD-NEXT: adc r0, r0, r0 261; HARD-NEXT: bl test_f64_i64_helper 262; HARD-NEXT: vadd.f64 d16, d0, d0 263; HARD-NEXT: vstr d16, [r4] 264; HARD-NEXT: pop {r4, pc} 265 %1 = load i64, ptr %p 266 %2 = add i64 %1, %1 267 %3 = call double @test_f64_i64_helper(i64 %2) 268 %4 = fadd double %3, %3 269 store double %4, ptr %q 270 ret void 271} 272 273declare double @test_f64_v1i64_helper(<1 x i64> %p) 274define void @test_f64_v1i64(ptr %p, ptr %q) { 275; SOFT-LABEL: test_f64_v1i64: 276; SOFT: @ %bb.0: 277; SOFT-NEXT: .save {r4, lr} 278; SOFT-NEXT: push {r4, lr} 279; SOFT-NEXT: vldr d16, [r0] 280; SOFT-NEXT: mov r4, r1 281; SOFT-NEXT: vadd.i64 d16, d16, d16 282; SOFT-NEXT: vmov r1, r0, d16 283; SOFT-NEXT: bl test_f64_v1i64_helper 284; SOFT-NEXT: vmov d16, r1, r0 285; SOFT-NEXT: vadd.f64 d16, d16, d16 286; SOFT-NEXT: vstr d16, [r4] 287; SOFT-NEXT: pop {r4, pc} 288; 289; HARD-LABEL: test_f64_v1i64: 290; HARD: @ %bb.0: 291; HARD-NEXT: .save {r4, lr} 292; HARD-NEXT: push {r4, lr} 293; HARD-NEXT: vldr d16, [r0] 294; HARD-NEXT: mov r4, r1 295; HARD-NEXT: vadd.i64 d0, d16, d16 296; HARD-NEXT: bl test_f64_v1i64_helper 297; HARD-NEXT: vadd.f64 d16, d0, d0 298; HARD-NEXT: vstr d16, [r4] 299; HARD-NEXT: pop {r4, pc} 300 %1 = load <1 x i64>, ptr %p 301 %2 = add <1 x i64> %1, %1 302 %3 = call double @test_f64_v1i64_helper(<1 x i64> %2) 303 %4 = fadd double %3, %3 304 store double %4, ptr %q 305 ret void 306} 307 308declare double @test_f64_v2f32_helper(<2 x float> %p) 309define void @test_f64_v2f32(ptr %p, ptr %q) { 310; SOFT-LABEL: test_f64_v2f32: 311; SOFT: @ %bb.0: 312; SOFT-NEXT: .save {r4, lr} 313; SOFT-NEXT: push {r4, lr} 314; SOFT-NEXT: vldr d16, [r0] 315; SOFT-NEXT: mov r4, r1 316; SOFT-NEXT: vrev64.32 d16, d16 317; SOFT-NEXT: vadd.f32 d16, d16, d16 318; SOFT-NEXT: vrev64.32 d16, d16 319; SOFT-NEXT: vmov r1, r0, d16 320; SOFT-NEXT: bl test_f64_v2f32_helper 321; SOFT-NEXT: vmov d16, r1, r0 322; SOFT-NEXT: vadd.f64 d16, d16, d16 323; SOFT-NEXT: vstr d16, [r4] 324; SOFT-NEXT: pop {r4, pc} 325; 326; HARD-LABEL: test_f64_v2f32: 327; HARD: @ %bb.0: 328; HARD-NEXT: .save {r4, lr} 329; HARD-NEXT: push {r4, lr} 330; HARD-NEXT: vldr d16, [r0] 331; HARD-NEXT: mov r4, r1 332; HARD-NEXT: vrev64.32 d16, d16 333; HARD-NEXT: vadd.f32 d16, d16, d16 334; HARD-NEXT: vrev64.32 d0, d16 335; HARD-NEXT: bl test_f64_v2f32_helper 336; HARD-NEXT: vadd.f64 d16, d0, d0 337; HARD-NEXT: vstr d16, [r4] 338; HARD-NEXT: pop {r4, pc} 339 %1 = load <2 x float>, ptr %p 340 %2 = fadd <2 x float> %1, %1 341 %3 = call double @test_f64_v2f32_helper(<2 x float> %2) 342 %4 = fadd double %3, %3 343 store double %4, ptr %q 344 ret void 345} 346 347declare double @test_f64_v2i32_helper(<2 x i32> %p) 348define void @test_f64_v2i32(ptr %p, ptr %q) { 349; SOFT-LABEL: test_f64_v2i32: 350; SOFT: @ %bb.0: 351; SOFT-NEXT: .save {r4, lr} 352; SOFT-NEXT: push {r4, lr} 353; SOFT-NEXT: vldr d16, [r0] 354; SOFT-NEXT: mov r4, r1 355; SOFT-NEXT: vrev64.32 d16, d16 356; SOFT-NEXT: vadd.i32 d16, d16, d16 357; SOFT-NEXT: vrev64.32 d16, d16 358; SOFT-NEXT: vmov r1, r0, d16 359; SOFT-NEXT: bl test_f64_v2i32_helper 360; SOFT-NEXT: vmov d16, r1, r0 361; SOFT-NEXT: vadd.f64 d16, d16, d16 362; SOFT-NEXT: vstr d16, [r4] 363; SOFT-NEXT: pop {r4, pc} 364; 365; HARD-LABEL: test_f64_v2i32: 366; HARD: @ %bb.0: 367; HARD-NEXT: .save {r4, lr} 368; HARD-NEXT: push {r4, lr} 369; HARD-NEXT: vldr d16, [r0] 370; HARD-NEXT: mov r4, r1 371; HARD-NEXT: vrev64.32 d16, d16 372; HARD-NEXT: vadd.i32 d16, d16, d16 373; HARD-NEXT: vrev64.32 d0, d16 374; HARD-NEXT: bl test_f64_v2i32_helper 375; HARD-NEXT: vadd.f64 d16, d0, d0 376; HARD-NEXT: vstr d16, [r4] 377; HARD-NEXT: pop {r4, pc} 378 %1 = load <2 x i32>, ptr %p 379 %2 = add <2 x i32> %1, %1 380 %3 = call double @test_f64_v2i32_helper(<2 x i32> %2) 381 %4 = fadd double %3, %3 382 store double %4, ptr %q 383 ret void 384} 385 386declare double @test_f64_v4i16_helper(<4 x i16> %p) 387define void @test_f64_v4i16(ptr %p, ptr %q) { 388; SOFT-LABEL: test_f64_v4i16: 389; SOFT: @ %bb.0: 390; SOFT-NEXT: .save {r4, lr} 391; SOFT-NEXT: push {r4, lr} 392; SOFT-NEXT: vldr d16, [r0] 393; SOFT-NEXT: mov r4, r1 394; SOFT-NEXT: vrev64.16 d16, d16 395; SOFT-NEXT: vadd.i16 d16, d16, d16 396; SOFT-NEXT: vrev64.16 d16, d16 397; SOFT-NEXT: vmov r1, r0, d16 398; SOFT-NEXT: bl test_f64_v4i16_helper 399; SOFT-NEXT: vmov d16, r1, r0 400; SOFT-NEXT: vadd.f64 d16, d16, d16 401; SOFT-NEXT: vstr d16, [r4] 402; SOFT-NEXT: pop {r4, pc} 403; 404; HARD-LABEL: test_f64_v4i16: 405; HARD: @ %bb.0: 406; HARD-NEXT: .save {r4, lr} 407; HARD-NEXT: push {r4, lr} 408; HARD-NEXT: vldr d16, [r0] 409; HARD-NEXT: mov r4, r1 410; HARD-NEXT: vrev64.16 d16, d16 411; HARD-NEXT: vadd.i16 d16, d16, d16 412; HARD-NEXT: vrev64.16 d0, d16 413; HARD-NEXT: bl test_f64_v4i16_helper 414; HARD-NEXT: vadd.f64 d16, d0, d0 415; HARD-NEXT: vstr d16, [r4] 416; HARD-NEXT: pop {r4, pc} 417 %1 = load <4 x i16>, ptr %p 418 %2 = add <4 x i16> %1, %1 419 %3 = call double @test_f64_v4i16_helper(<4 x i16> %2) 420 %4 = fadd double %3, %3 421 store double %4, ptr %q 422 ret void 423} 424 425declare double @test_f64_v8i8_helper(<8 x i8> %p) 426define void @test_f64_v8i8(ptr %p, ptr %q) { 427; SOFT-LABEL: test_f64_v8i8: 428; SOFT: @ %bb.0: 429; SOFT-NEXT: .save {r4, lr} 430; SOFT-NEXT: push {r4, lr} 431; SOFT-NEXT: vldr d16, [r0] 432; SOFT-NEXT: mov r4, r1 433; SOFT-NEXT: vrev64.8 d16, d16 434; SOFT-NEXT: vadd.i8 d16, d16, d16 435; SOFT-NEXT: vrev64.8 d16, d16 436; SOFT-NEXT: vmov r1, r0, d16 437; SOFT-NEXT: bl test_f64_v8i8_helper 438; SOFT-NEXT: vmov d16, r1, r0 439; SOFT-NEXT: vadd.f64 d16, d16, d16 440; SOFT-NEXT: vstr d16, [r4] 441; SOFT-NEXT: pop {r4, pc} 442; 443; HARD-LABEL: test_f64_v8i8: 444; HARD: @ %bb.0: 445; HARD-NEXT: .save {r4, lr} 446; HARD-NEXT: push {r4, lr} 447; HARD-NEXT: vldr d16, [r0] 448; HARD-NEXT: mov r4, r1 449; HARD-NEXT: vrev64.8 d16, d16 450; HARD-NEXT: vadd.i8 d16, d16, d16 451; HARD-NEXT: vrev64.8 d0, d16 452; HARD-NEXT: bl test_f64_v8i8_helper 453; HARD-NEXT: vadd.f64 d16, d0, d0 454; HARD-NEXT: vstr d16, [r4] 455; HARD-NEXT: pop {r4, pc} 456 %1 = load <8 x i8>, ptr %p 457 %2 = add <8 x i8> %1, %1 458 %3 = call double @test_f64_v8i8_helper(<8 x i8> %2) 459 %4 = fadd double %3, %3 460 store double %4, ptr %q 461 ret void 462} 463 464declare <1 x i64> @test_v1i64_i64_helper(i64 %p) 465define void @test_v1i64_i64(ptr %p, ptr %q) { 466; SOFT-LABEL: test_v1i64_i64: 467; SOFT: @ %bb.0: 468; SOFT-NEXT: .save {r4, lr} 469; SOFT-NEXT: push {r4, lr} 470; SOFT-NEXT: mov r4, r1 471; SOFT-NEXT: ldrd r0, r1, [r0] 472; SOFT-NEXT: adds r1, r1, r1 473; SOFT-NEXT: adc r0, r0, r0 474; SOFT-NEXT: bl test_v1i64_i64_helper 475; SOFT-NEXT: vmov d16, r1, r0 476; SOFT-NEXT: vadd.i64 d16, d16, d16 477; SOFT-NEXT: vstr d16, [r4] 478; SOFT-NEXT: pop {r4, pc} 479; 480; HARD-LABEL: test_v1i64_i64: 481; HARD: @ %bb.0: 482; HARD-NEXT: .save {r4, lr} 483; HARD-NEXT: push {r4, lr} 484; HARD-NEXT: mov r4, r1 485; HARD-NEXT: ldrd r0, r1, [r0] 486; HARD-NEXT: adds r1, r1, r1 487; HARD-NEXT: adc r0, r0, r0 488; HARD-NEXT: bl test_v1i64_i64_helper 489; HARD-NEXT: vadd.i64 d16, d0, d0 490; HARD-NEXT: vstr d16, [r4] 491; HARD-NEXT: pop {r4, pc} 492 %1 = load i64, ptr %p 493 %2 = add i64 %1, %1 494 %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2) 495 %4 = add <1 x i64> %3, %3 496 store <1 x i64> %4, ptr %q 497 ret void 498} 499 500declare <1 x i64> @test_v1i64_f64_helper(double %p) 501define void @test_v1i64_f64(ptr %p, ptr %q) { 502; SOFT-LABEL: test_v1i64_f64: 503; SOFT: @ %bb.0: 504; SOFT-NEXT: .save {r4, lr} 505; SOFT-NEXT: push {r4, lr} 506; SOFT-NEXT: vldr d16, [r0] 507; SOFT-NEXT: mov r4, r1 508; SOFT-NEXT: vadd.f64 d16, d16, d16 509; SOFT-NEXT: vmov r1, r0, d16 510; SOFT-NEXT: bl test_v1i64_f64_helper 511; SOFT-NEXT: vmov d16, r1, r0 512; SOFT-NEXT: vadd.i64 d16, d16, d16 513; SOFT-NEXT: vstr d16, [r4] 514; SOFT-NEXT: pop {r4, pc} 515; 516; HARD-LABEL: test_v1i64_f64: 517; HARD: @ %bb.0: 518; HARD-NEXT: .save {r4, lr} 519; HARD-NEXT: push {r4, lr} 520; HARD-NEXT: vldr d16, [r0] 521; HARD-NEXT: mov r4, r1 522; HARD-NEXT: vadd.f64 d0, d16, d16 523; HARD-NEXT: bl test_v1i64_f64_helper 524; HARD-NEXT: vadd.i64 d16, d0, d0 525; HARD-NEXT: vstr d16, [r4] 526; HARD-NEXT: pop {r4, pc} 527 %1 = load double, ptr %p 528 %2 = fadd double %1, %1 529 %3 = call <1 x i64> @test_v1i64_f64_helper(double %2) 530 %4 = add <1 x i64> %3, %3 531 store <1 x i64> %4, ptr %q 532 ret void 533} 534 535declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p) 536define void @test_v1i64_v2f32(ptr %p, ptr %q) { 537; SOFT-LABEL: test_v1i64_v2f32: 538; SOFT: @ %bb.0: 539; SOFT-NEXT: .save {r4, lr} 540; SOFT-NEXT: push {r4, lr} 541; SOFT-NEXT: vldr d16, [r0] 542; SOFT-NEXT: mov r4, r1 543; SOFT-NEXT: vrev64.32 d16, d16 544; SOFT-NEXT: vadd.f32 d16, d16, d16 545; SOFT-NEXT: vrev64.32 d16, d16 546; SOFT-NEXT: vmov r1, r0, d16 547; SOFT-NEXT: bl test_v1i64_v2f32_helper 548; SOFT-NEXT: vmov d16, r1, r0 549; SOFT-NEXT: vadd.i64 d16, d16, d16 550; SOFT-NEXT: vstr d16, [r4] 551; SOFT-NEXT: pop {r4, pc} 552; 553; HARD-LABEL: test_v1i64_v2f32: 554; HARD: @ %bb.0: 555; HARD-NEXT: .save {r4, lr} 556; HARD-NEXT: push {r4, lr} 557; HARD-NEXT: vldr d16, [r0] 558; HARD-NEXT: mov r4, r1 559; HARD-NEXT: vrev64.32 d16, d16 560; HARD-NEXT: vadd.f32 d16, d16, d16 561; HARD-NEXT: vrev64.32 d0, d16 562; HARD-NEXT: bl test_v1i64_v2f32_helper 563; HARD-NEXT: vadd.i64 d16, d0, d0 564; HARD-NEXT: vstr d16, [r4] 565; HARD-NEXT: pop {r4, pc} 566 %1 = load <2 x float>, ptr %p 567 %2 = fadd <2 x float> %1, %1 568 %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2) 569 %4 = add <1 x i64> %3, %3 570 store <1 x i64> %4, ptr %q 571 ret void 572} 573 574declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p) 575define void @test_v1i64_v2i32(ptr %p, ptr %q) { 576; SOFT-LABEL: test_v1i64_v2i32: 577; SOFT: @ %bb.0: 578; SOFT-NEXT: .save {r4, lr} 579; SOFT-NEXT: push {r4, lr} 580; SOFT-NEXT: vldr d16, [r0] 581; SOFT-NEXT: mov r4, r1 582; SOFT-NEXT: vrev64.32 d16, d16 583; SOFT-NEXT: vadd.i32 d16, d16, d16 584; SOFT-NEXT: vrev64.32 d16, d16 585; SOFT-NEXT: vmov r1, r0, d16 586; SOFT-NEXT: bl test_v1i64_v2i32_helper 587; SOFT-NEXT: vmov d16, r1, r0 588; SOFT-NEXT: vadd.i64 d16, d16, d16 589; SOFT-NEXT: vstr d16, [r4] 590; SOFT-NEXT: pop {r4, pc} 591; 592; HARD-LABEL: test_v1i64_v2i32: 593; HARD: @ %bb.0: 594; HARD-NEXT: .save {r4, lr} 595; HARD-NEXT: push {r4, lr} 596; HARD-NEXT: vldr d16, [r0] 597; HARD-NEXT: mov r4, r1 598; HARD-NEXT: vrev64.32 d16, d16 599; HARD-NEXT: vadd.i32 d16, d16, d16 600; HARD-NEXT: vrev64.32 d0, d16 601; HARD-NEXT: bl test_v1i64_v2i32_helper 602; HARD-NEXT: vadd.i64 d16, d0, d0 603; HARD-NEXT: vstr d16, [r4] 604; HARD-NEXT: pop {r4, pc} 605 %1 = load <2 x i32>, ptr %p 606 %2 = add <2 x i32> %1, %1 607 %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2) 608 %4 = add <1 x i64> %3, %3 609 store <1 x i64> %4, ptr %q 610 ret void 611} 612 613declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p) 614define void @test_v1i64_v4i16(ptr %p, ptr %q) { 615; SOFT-LABEL: test_v1i64_v4i16: 616; SOFT: @ %bb.0: 617; SOFT-NEXT: .save {r4, lr} 618; SOFT-NEXT: push {r4, lr} 619; SOFT-NEXT: vldr d16, [r0] 620; SOFT-NEXT: mov r4, r1 621; SOFT-NEXT: vrev64.16 d16, d16 622; SOFT-NEXT: vadd.i16 d16, d16, d16 623; SOFT-NEXT: vrev64.16 d16, d16 624; SOFT-NEXT: vmov r1, r0, d16 625; SOFT-NEXT: bl test_v1i64_v4i16_helper 626; SOFT-NEXT: vmov d16, r1, r0 627; SOFT-NEXT: vadd.i64 d16, d16, d16 628; SOFT-NEXT: vstr d16, [r4] 629; SOFT-NEXT: pop {r4, pc} 630; 631; HARD-LABEL: test_v1i64_v4i16: 632; HARD: @ %bb.0: 633; HARD-NEXT: .save {r4, lr} 634; HARD-NEXT: push {r4, lr} 635; HARD-NEXT: vldr d16, [r0] 636; HARD-NEXT: mov r4, r1 637; HARD-NEXT: vrev64.16 d16, d16 638; HARD-NEXT: vadd.i16 d16, d16, d16 639; HARD-NEXT: vrev64.16 d0, d16 640; HARD-NEXT: bl test_v1i64_v4i16_helper 641; HARD-NEXT: vadd.i64 d16, d0, d0 642; HARD-NEXT: vstr d16, [r4] 643; HARD-NEXT: pop {r4, pc} 644 %1 = load <4 x i16>, ptr %p 645 %2 = add <4 x i16> %1, %1 646 %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2) 647 %4 = add <1 x i64> %3, %3 648 store <1 x i64> %4, ptr %q 649 ret void 650} 651 652declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p) 653define void @test_v1i64_v8i8(ptr %p, ptr %q) { 654; SOFT-LABEL: test_v1i64_v8i8: 655; SOFT: @ %bb.0: 656; SOFT-NEXT: .save {r4, lr} 657; SOFT-NEXT: push {r4, lr} 658; SOFT-NEXT: vldr d16, [r0] 659; SOFT-NEXT: mov r4, r1 660; SOFT-NEXT: vrev64.8 d16, d16 661; SOFT-NEXT: vadd.i8 d16, d16, d16 662; SOFT-NEXT: vrev64.8 d16, d16 663; SOFT-NEXT: vmov r1, r0, d16 664; SOFT-NEXT: bl test_v1i64_v8i8_helper 665; SOFT-NEXT: vmov d16, r1, r0 666; SOFT-NEXT: vadd.i64 d16, d16, d16 667; SOFT-NEXT: vstr d16, [r4] 668; SOFT-NEXT: pop {r4, pc} 669; 670; HARD-LABEL: test_v1i64_v8i8: 671; HARD: @ %bb.0: 672; HARD-NEXT: .save {r4, lr} 673; HARD-NEXT: push {r4, lr} 674; HARD-NEXT: vldr d16, [r0] 675; HARD-NEXT: mov r4, r1 676; HARD-NEXT: vrev64.8 d16, d16 677; HARD-NEXT: vadd.i8 d16, d16, d16 678; HARD-NEXT: vrev64.8 d0, d16 679; HARD-NEXT: bl test_v1i64_v8i8_helper 680; HARD-NEXT: vadd.i64 d16, d0, d0 681; HARD-NEXT: vstr d16, [r4] 682; HARD-NEXT: pop {r4, pc} 683 %1 = load <8 x i8>, ptr %p 684 %2 = add <8 x i8> %1, %1 685 %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2) 686 %4 = add <1 x i64> %3, %3 687 store <1 x i64> %4, ptr %q 688 ret void 689} 690 691declare <2 x float> @test_v2f32_i64_helper(i64 %p) 692define void @test_v2f32_i64(ptr %p, ptr %q) { 693; SOFT-LABEL: test_v2f32_i64: 694; SOFT: @ %bb.0: 695; SOFT-NEXT: .save {r4, lr} 696; SOFT-NEXT: push {r4, lr} 697; SOFT-NEXT: mov r4, r1 698; SOFT-NEXT: ldrd r0, r1, [r0] 699; SOFT-NEXT: adds r1, r1, r1 700; SOFT-NEXT: adc r0, r0, r0 701; SOFT-NEXT: bl test_v2f32_i64_helper 702; SOFT-NEXT: vmov d16, r1, r0 703; SOFT-NEXT: vrev64.32 d16, d16 704; SOFT-NEXT: vadd.f32 d16, d16, d16 705; SOFT-NEXT: vrev64.32 d16, d16 706; SOFT-NEXT: vstr d16, [r4] 707; SOFT-NEXT: pop {r4, pc} 708; 709; HARD-LABEL: test_v2f32_i64: 710; HARD: @ %bb.0: 711; HARD-NEXT: .save {r4, lr} 712; HARD-NEXT: push {r4, lr} 713; HARD-NEXT: mov r4, r1 714; HARD-NEXT: ldrd r0, r1, [r0] 715; HARD-NEXT: adds r1, r1, r1 716; HARD-NEXT: adc r0, r0, r0 717; HARD-NEXT: bl test_v2f32_i64_helper 718; HARD-NEXT: vrev64.32 d16, d0 719; HARD-NEXT: vadd.f32 d16, d16, d16 720; HARD-NEXT: vrev64.32 d16, d16 721; HARD-NEXT: vstr d16, [r4] 722; HARD-NEXT: pop {r4, pc} 723 %1 = load i64, ptr %p 724 %2 = add i64 %1, %1 725 %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2) 726 %4 = fadd <2 x float> %3, %3 727 store <2 x float> %4, ptr %q 728 ret void 729} 730 731declare <2 x float> @test_v2f32_f64_helper(double %p) 732define void @test_v2f32_f64(ptr %p, ptr %q) { 733; SOFT-LABEL: test_v2f32_f64: 734; SOFT: @ %bb.0: 735; SOFT-NEXT: .save {r4, lr} 736; SOFT-NEXT: push {r4, lr} 737; SOFT-NEXT: vldr d16, [r0] 738; SOFT-NEXT: mov r4, r1 739; SOFT-NEXT: vadd.f64 d16, d16, d16 740; SOFT-NEXT: vmov r1, r0, d16 741; SOFT-NEXT: bl test_v2f32_f64_helper 742; SOFT-NEXT: vmov d16, r1, r0 743; SOFT-NEXT: vrev64.32 d16, d16 744; SOFT-NEXT: vadd.f32 d16, d16, d16 745; SOFT-NEXT: vrev64.32 d16, d16 746; SOFT-NEXT: vstr d16, [r4] 747; SOFT-NEXT: pop {r4, pc} 748; 749; HARD-LABEL: test_v2f32_f64: 750; HARD: @ %bb.0: 751; HARD-NEXT: .save {r4, lr} 752; HARD-NEXT: push {r4, lr} 753; HARD-NEXT: vldr d16, [r0] 754; HARD-NEXT: mov r4, r1 755; HARD-NEXT: vadd.f64 d0, d16, d16 756; HARD-NEXT: bl test_v2f32_f64_helper 757; HARD-NEXT: vrev64.32 d16, d0 758; HARD-NEXT: vadd.f32 d16, d16, d16 759; HARD-NEXT: vrev64.32 d16, d16 760; HARD-NEXT: vstr d16, [r4] 761; HARD-NEXT: pop {r4, pc} 762 %1 = load double, ptr %p 763 %2 = fadd double %1, %1 764 %3 = call <2 x float> @test_v2f32_f64_helper(double %2) 765 %4 = fadd <2 x float> %3, %3 766 store <2 x float> %4, ptr %q 767 ret void 768} 769 770declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p) 771define void @test_v2f32_v1i64(ptr %p, ptr %q) { 772; SOFT-LABEL: test_v2f32_v1i64: 773; SOFT: @ %bb.0: 774; SOFT-NEXT: .save {r4, lr} 775; SOFT-NEXT: push {r4, lr} 776; SOFT-NEXT: vldr d16, [r0] 777; SOFT-NEXT: mov r4, r1 778; SOFT-NEXT: vadd.i64 d16, d16, d16 779; SOFT-NEXT: vmov r1, r0, d16 780; SOFT-NEXT: bl test_v2f32_v1i64_helper 781; SOFT-NEXT: vmov d16, r1, r0 782; SOFT-NEXT: vrev64.32 d16, d16 783; SOFT-NEXT: vadd.f32 d16, d16, d16 784; SOFT-NEXT: vrev64.32 d16, d16 785; SOFT-NEXT: vstr d16, [r4] 786; SOFT-NEXT: pop {r4, pc} 787; 788; HARD-LABEL: test_v2f32_v1i64: 789; HARD: @ %bb.0: 790; HARD-NEXT: .save {r4, lr} 791; HARD-NEXT: push {r4, lr} 792; HARD-NEXT: vldr d16, [r0] 793; HARD-NEXT: mov r4, r1 794; HARD-NEXT: vadd.i64 d0, d16, d16 795; HARD-NEXT: bl test_v2f32_v1i64_helper 796; HARD-NEXT: vrev64.32 d16, d0 797; HARD-NEXT: vadd.f32 d16, d16, d16 798; HARD-NEXT: vrev64.32 d16, d16 799; HARD-NEXT: vstr d16, [r4] 800; HARD-NEXT: pop {r4, pc} 801 %1 = load <1 x i64>, ptr %p 802 %2 = add <1 x i64> %1, %1 803 %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2) 804 %4 = fadd <2 x float> %3, %3 805 store <2 x float> %4, ptr %q 806 ret void 807} 808 809declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p) 810define void @test_v2f32_v2i32(ptr %p, ptr %q) { 811; SOFT-LABEL: test_v2f32_v2i32: 812; SOFT: @ %bb.0: 813; SOFT-NEXT: .save {r4, lr} 814; SOFT-NEXT: push {r4, lr} 815; SOFT-NEXT: vldr d16, [r0] 816; SOFT-NEXT: mov r4, r1 817; SOFT-NEXT: vrev64.32 d16, d16 818; SOFT-NEXT: vadd.i32 d16, d16, d16 819; SOFT-NEXT: vrev64.32 d16, d16 820; SOFT-NEXT: vmov r1, r0, d16 821; SOFT-NEXT: bl test_v2f32_v2i32_helper 822; SOFT-NEXT: vmov d16, r1, r0 823; SOFT-NEXT: vrev64.32 d16, d16 824; SOFT-NEXT: vadd.f32 d16, d16, d16 825; SOFT-NEXT: vrev64.32 d16, d16 826; SOFT-NEXT: vstr d16, [r4] 827; SOFT-NEXT: pop {r4, pc} 828; 829; HARD-LABEL: test_v2f32_v2i32: 830; HARD: @ %bb.0: 831; HARD-NEXT: .save {r4, lr} 832; HARD-NEXT: push {r4, lr} 833; HARD-NEXT: vldr d16, [r0] 834; HARD-NEXT: mov r4, r1 835; HARD-NEXT: vrev64.32 d16, d16 836; HARD-NEXT: vadd.i32 d16, d16, d16 837; HARD-NEXT: vrev64.32 d0, d16 838; HARD-NEXT: bl test_v2f32_v2i32_helper 839; HARD-NEXT: vrev64.32 d16, d0 840; HARD-NEXT: vadd.f32 d16, d16, d16 841; HARD-NEXT: vrev64.32 d16, d16 842; HARD-NEXT: vstr d16, [r4] 843; HARD-NEXT: pop {r4, pc} 844 %1 = load <2 x i32>, ptr %p 845 %2 = add <2 x i32> %1, %1 846 %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2) 847 %4 = fadd <2 x float> %3, %3 848 store <2 x float> %4, ptr %q 849 ret void 850} 851 852declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p) 853define void @test_v2f32_v4i16(ptr %p, ptr %q) { 854; SOFT-LABEL: test_v2f32_v4i16: 855; SOFT: @ %bb.0: 856; SOFT-NEXT: .save {r4, lr} 857; SOFT-NEXT: push {r4, lr} 858; SOFT-NEXT: vldr d16, [r0] 859; SOFT-NEXT: mov r4, r1 860; SOFT-NEXT: vrev64.16 d16, d16 861; SOFT-NEXT: vadd.i16 d16, d16, d16 862; SOFT-NEXT: vrev64.16 d16, d16 863; SOFT-NEXT: vmov r1, r0, d16 864; SOFT-NEXT: bl test_v2f32_v4i16_helper 865; SOFT-NEXT: vmov d16, r1, r0 866; SOFT-NEXT: vrev64.32 d16, d16 867; SOFT-NEXT: vadd.f32 d16, d16, d16 868; SOFT-NEXT: vrev64.32 d16, d16 869; SOFT-NEXT: vstr d16, [r4] 870; SOFT-NEXT: pop {r4, pc} 871; 872; HARD-LABEL: test_v2f32_v4i16: 873; HARD: @ %bb.0: 874; HARD-NEXT: .save {r4, lr} 875; HARD-NEXT: push {r4, lr} 876; HARD-NEXT: vldr d16, [r0] 877; HARD-NEXT: mov r4, r1 878; HARD-NEXT: vrev64.16 d16, d16 879; HARD-NEXT: vadd.i16 d16, d16, d16 880; HARD-NEXT: vrev64.16 d0, d16 881; HARD-NEXT: bl test_v2f32_v4i16_helper 882; HARD-NEXT: vrev64.32 d16, d0 883; HARD-NEXT: vadd.f32 d16, d16, d16 884; HARD-NEXT: vrev64.32 d16, d16 885; HARD-NEXT: vstr d16, [r4] 886; HARD-NEXT: pop {r4, pc} 887 %1 = load <4 x i16>, ptr %p 888 %2 = add <4 x i16> %1, %1 889 %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2) 890 %4 = fadd <2 x float> %3, %3 891 store <2 x float> %4, ptr %q 892 ret void 893} 894 895declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p) 896define void @test_v2f32_v8i8(ptr %p, ptr %q) { 897; SOFT-LABEL: test_v2f32_v8i8: 898; SOFT: @ %bb.0: 899; SOFT-NEXT: .save {r4, lr} 900; SOFT-NEXT: push {r4, lr} 901; SOFT-NEXT: vldr d16, [r0] 902; SOFT-NEXT: mov r4, r1 903; SOFT-NEXT: vrev64.8 d16, d16 904; SOFT-NEXT: vadd.i8 d16, d16, d16 905; SOFT-NEXT: vrev64.8 d16, d16 906; SOFT-NEXT: vmov r1, r0, d16 907; SOFT-NEXT: bl test_v2f32_v8i8_helper 908; SOFT-NEXT: vmov d16, r1, r0 909; SOFT-NEXT: vrev64.32 d16, d16 910; SOFT-NEXT: vadd.f32 d16, d16, d16 911; SOFT-NEXT: vrev64.32 d16, d16 912; SOFT-NEXT: vstr d16, [r4] 913; SOFT-NEXT: pop {r4, pc} 914; 915; HARD-LABEL: test_v2f32_v8i8: 916; HARD: @ %bb.0: 917; HARD-NEXT: .save {r4, lr} 918; HARD-NEXT: push {r4, lr} 919; HARD-NEXT: vldr d16, [r0] 920; HARD-NEXT: mov r4, r1 921; HARD-NEXT: vrev64.8 d16, d16 922; HARD-NEXT: vadd.i8 d16, d16, d16 923; HARD-NEXT: vrev64.8 d0, d16 924; HARD-NEXT: bl test_v2f32_v8i8_helper 925; HARD-NEXT: vrev64.32 d16, d0 926; HARD-NEXT: vadd.f32 d16, d16, d16 927; HARD-NEXT: vrev64.32 d16, d16 928; HARD-NEXT: vstr d16, [r4] 929; HARD-NEXT: pop {r4, pc} 930 %1 = load <8 x i8>, ptr %p 931 %2 = add <8 x i8> %1, %1 932 %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2) 933 %4 = fadd <2 x float> %3, %3 934 store <2 x float> %4, ptr %q 935 ret void 936} 937 938declare <2 x i32> @test_v2i32_i64_helper(i64 %p) 939define void @test_v2i32_i64(ptr %p, ptr %q) { 940; SOFT-LABEL: test_v2i32_i64: 941; SOFT: @ %bb.0: 942; SOFT-NEXT: .save {r4, lr} 943; SOFT-NEXT: push {r4, lr} 944; SOFT-NEXT: mov r4, r1 945; SOFT-NEXT: ldrd r0, r1, [r0] 946; SOFT-NEXT: adds r1, r1, r1 947; SOFT-NEXT: adc r0, r0, r0 948; SOFT-NEXT: bl test_v2i32_i64_helper 949; SOFT-NEXT: vmov d16, r1, r0 950; SOFT-NEXT: vrev64.32 d16, d16 951; SOFT-NEXT: vadd.i32 d16, d16, d16 952; SOFT-NEXT: vrev64.32 d16, d16 953; SOFT-NEXT: vstr d16, [r4] 954; SOFT-NEXT: pop {r4, pc} 955; 956; HARD-LABEL: test_v2i32_i64: 957; HARD: @ %bb.0: 958; HARD-NEXT: .save {r4, lr} 959; HARD-NEXT: push {r4, lr} 960; HARD-NEXT: mov r4, r1 961; HARD-NEXT: ldrd r0, r1, [r0] 962; HARD-NEXT: adds r1, r1, r1 963; HARD-NEXT: adc r0, r0, r0 964; HARD-NEXT: bl test_v2i32_i64_helper 965; HARD-NEXT: vrev64.32 d16, d0 966; HARD-NEXT: vadd.i32 d16, d16, d16 967; HARD-NEXT: vrev64.32 d16, d16 968; HARD-NEXT: vstr d16, [r4] 969; HARD-NEXT: pop {r4, pc} 970 %1 = load i64, ptr %p 971 %2 = add i64 %1, %1 972 %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2) 973 %4 = add <2 x i32> %3, %3 974 store <2 x i32> %4, ptr %q 975 ret void 976} 977 978declare <2 x i32> @test_v2i32_f64_helper(double %p) 979define void @test_v2i32_f64(ptr %p, ptr %q) { 980; SOFT-LABEL: test_v2i32_f64: 981; SOFT: @ %bb.0: 982; SOFT-NEXT: .save {r4, lr} 983; SOFT-NEXT: push {r4, lr} 984; SOFT-NEXT: vldr d16, [r0] 985; SOFT-NEXT: mov r4, r1 986; SOFT-NEXT: vadd.f64 d16, d16, d16 987; SOFT-NEXT: vmov r1, r0, d16 988; SOFT-NEXT: bl test_v2i32_f64_helper 989; SOFT-NEXT: vmov d16, r1, r0 990; SOFT-NEXT: vrev64.32 d16, d16 991; SOFT-NEXT: vadd.i32 d16, d16, d16 992; SOFT-NEXT: vrev64.32 d16, d16 993; SOFT-NEXT: vstr d16, [r4] 994; SOFT-NEXT: pop {r4, pc} 995; 996; HARD-LABEL: test_v2i32_f64: 997; HARD: @ %bb.0: 998; HARD-NEXT: .save {r4, lr} 999; HARD-NEXT: push {r4, lr} 1000; HARD-NEXT: vldr d16, [r0] 1001; HARD-NEXT: mov r4, r1 1002; HARD-NEXT: vadd.f64 d0, d16, d16 1003; HARD-NEXT: bl test_v2i32_f64_helper 1004; HARD-NEXT: vrev64.32 d16, d0 1005; HARD-NEXT: vadd.i32 d16, d16, d16 1006; HARD-NEXT: vrev64.32 d16, d16 1007; HARD-NEXT: vstr d16, [r4] 1008; HARD-NEXT: pop {r4, pc} 1009 %1 = load double, ptr %p 1010 %2 = fadd double %1, %1 1011 %3 = call <2 x i32> @test_v2i32_f64_helper(double %2) 1012 %4 = add <2 x i32> %3, %3 1013 store <2 x i32> %4, ptr %q 1014 ret void 1015} 1016 1017declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p) 1018define void @test_v2i32_v1i64(ptr %p, ptr %q) { 1019; SOFT-LABEL: test_v2i32_v1i64: 1020; SOFT: @ %bb.0: 1021; SOFT-NEXT: .save {r4, lr} 1022; SOFT-NEXT: push {r4, lr} 1023; SOFT-NEXT: vldr d16, [r0] 1024; SOFT-NEXT: mov r4, r1 1025; SOFT-NEXT: vadd.i64 d16, d16, d16 1026; SOFT-NEXT: vmov r1, r0, d16 1027; SOFT-NEXT: bl test_v2i32_v1i64_helper 1028; SOFT-NEXT: vmov d16, r1, r0 1029; SOFT-NEXT: vrev64.32 d16, d16 1030; SOFT-NEXT: vadd.i32 d16, d16, d16 1031; SOFT-NEXT: vrev64.32 d16, d16 1032; SOFT-NEXT: vstr d16, [r4] 1033; SOFT-NEXT: pop {r4, pc} 1034; 1035; HARD-LABEL: test_v2i32_v1i64: 1036; HARD: @ %bb.0: 1037; HARD-NEXT: .save {r4, lr} 1038; HARD-NEXT: push {r4, lr} 1039; HARD-NEXT: vldr d16, [r0] 1040; HARD-NEXT: mov r4, r1 1041; HARD-NEXT: vadd.i64 d0, d16, d16 1042; HARD-NEXT: bl test_v2i32_v1i64_helper 1043; HARD-NEXT: vrev64.32 d16, d0 1044; HARD-NEXT: vadd.i32 d16, d16, d16 1045; HARD-NEXT: vrev64.32 d16, d16 1046; HARD-NEXT: vstr d16, [r4] 1047; HARD-NEXT: pop {r4, pc} 1048 %1 = load <1 x i64>, ptr %p 1049 %2 = add <1 x i64> %1, %1 1050 %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2) 1051 %4 = add <2 x i32> %3, %3 1052 store <2 x i32> %4, ptr %q 1053 ret void 1054} 1055 1056declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p) 1057define void @test_v2i32_v2f32(ptr %p, ptr %q) { 1058; SOFT-LABEL: test_v2i32_v2f32: 1059; SOFT: @ %bb.0: 1060; SOFT-NEXT: .save {r4, lr} 1061; SOFT-NEXT: push {r4, lr} 1062; SOFT-NEXT: vldr d16, [r0] 1063; SOFT-NEXT: mov r4, r1 1064; SOFT-NEXT: vrev64.32 d16, d16 1065; SOFT-NEXT: vadd.f32 d16, d16, d16 1066; SOFT-NEXT: vrev64.32 d16, d16 1067; SOFT-NEXT: vmov r1, r0, d16 1068; SOFT-NEXT: bl test_v2i32_v2f32_helper 1069; SOFT-NEXT: vmov d16, r1, r0 1070; SOFT-NEXT: vrev64.32 d16, d16 1071; SOFT-NEXT: vadd.i32 d16, d16, d16 1072; SOFT-NEXT: vrev64.32 d16, d16 1073; SOFT-NEXT: vstr d16, [r4] 1074; SOFT-NEXT: pop {r4, pc} 1075; 1076; HARD-LABEL: test_v2i32_v2f32: 1077; HARD: @ %bb.0: 1078; HARD-NEXT: .save {r4, lr} 1079; HARD-NEXT: push {r4, lr} 1080; HARD-NEXT: vldr d16, [r0] 1081; HARD-NEXT: mov r4, r1 1082; HARD-NEXT: vrev64.32 d16, d16 1083; HARD-NEXT: vadd.f32 d16, d16, d16 1084; HARD-NEXT: vrev64.32 d0, d16 1085; HARD-NEXT: bl test_v2i32_v2f32_helper 1086; HARD-NEXT: vrev64.32 d16, d0 1087; HARD-NEXT: vadd.i32 d16, d16, d16 1088; HARD-NEXT: vrev64.32 d16, d16 1089; HARD-NEXT: vstr d16, [r4] 1090; HARD-NEXT: pop {r4, pc} 1091 %1 = load <2 x float>, ptr %p 1092 %2 = fadd <2 x float> %1, %1 1093 %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2) 1094 %4 = add <2 x i32> %3, %3 1095 store <2 x i32> %4, ptr %q 1096 ret void 1097} 1098 1099declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p) 1100define void @test_v2i32_v4i16(ptr %p, ptr %q) { 1101; SOFT-LABEL: test_v2i32_v4i16: 1102; SOFT: @ %bb.0: 1103; SOFT-NEXT: .save {r4, lr} 1104; SOFT-NEXT: push {r4, lr} 1105; SOFT-NEXT: vldr d16, [r0] 1106; SOFT-NEXT: mov r4, r1 1107; SOFT-NEXT: vrev64.16 d16, d16 1108; SOFT-NEXT: vadd.i16 d16, d16, d16 1109; SOFT-NEXT: vrev64.16 d16, d16 1110; SOFT-NEXT: vmov r1, r0, d16 1111; SOFT-NEXT: bl test_v2i32_v4i16_helper 1112; SOFT-NEXT: vmov d16, r1, r0 1113; SOFT-NEXT: vrev64.32 d16, d16 1114; SOFT-NEXT: vadd.i32 d16, d16, d16 1115; SOFT-NEXT: vrev64.32 d16, d16 1116; SOFT-NEXT: vstr d16, [r4] 1117; SOFT-NEXT: pop {r4, pc} 1118; 1119; HARD-LABEL: test_v2i32_v4i16: 1120; HARD: @ %bb.0: 1121; HARD-NEXT: .save {r4, lr} 1122; HARD-NEXT: push {r4, lr} 1123; HARD-NEXT: vldr d16, [r0] 1124; HARD-NEXT: mov r4, r1 1125; HARD-NEXT: vrev64.16 d16, d16 1126; HARD-NEXT: vadd.i16 d16, d16, d16 1127; HARD-NEXT: vrev64.16 d0, d16 1128; HARD-NEXT: bl test_v2i32_v4i16_helper 1129; HARD-NEXT: vrev64.32 d16, d0 1130; HARD-NEXT: vadd.i32 d16, d16, d16 1131; HARD-NEXT: vrev64.32 d16, d16 1132; HARD-NEXT: vstr d16, [r4] 1133; HARD-NEXT: pop {r4, pc} 1134 %1 = load <4 x i16>, ptr %p 1135 %2 = add <4 x i16> %1, %1 1136 %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2) 1137 %4 = add <2 x i32> %3, %3 1138 store <2 x i32> %4, ptr %q 1139 ret void 1140} 1141 1142declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p) 1143define void @test_v2i32_v8i8(ptr %p, ptr %q) { 1144; SOFT-LABEL: test_v2i32_v8i8: 1145; SOFT: @ %bb.0: 1146; SOFT-NEXT: .save {r4, lr} 1147; SOFT-NEXT: push {r4, lr} 1148; SOFT-NEXT: vldr d16, [r0] 1149; SOFT-NEXT: mov r4, r1 1150; SOFT-NEXT: vrev64.8 d16, d16 1151; SOFT-NEXT: vadd.i8 d16, d16, d16 1152; SOFT-NEXT: vrev64.8 d16, d16 1153; SOFT-NEXT: vmov r1, r0, d16 1154; SOFT-NEXT: bl test_v2i32_v8i8_helper 1155; SOFT-NEXT: vmov d16, r1, r0 1156; SOFT-NEXT: vrev64.32 d16, d16 1157; SOFT-NEXT: vadd.i32 d16, d16, d16 1158; SOFT-NEXT: vrev64.32 d16, d16 1159; SOFT-NEXT: vstr d16, [r4] 1160; SOFT-NEXT: pop {r4, pc} 1161; 1162; HARD-LABEL: test_v2i32_v8i8: 1163; HARD: @ %bb.0: 1164; HARD-NEXT: .save {r4, lr} 1165; HARD-NEXT: push {r4, lr} 1166; HARD-NEXT: vldr d16, [r0] 1167; HARD-NEXT: mov r4, r1 1168; HARD-NEXT: vrev64.8 d16, d16 1169; HARD-NEXT: vadd.i8 d16, d16, d16 1170; HARD-NEXT: vrev64.8 d0, d16 1171; HARD-NEXT: bl test_v2i32_v8i8_helper 1172; HARD-NEXT: vrev64.32 d16, d0 1173; HARD-NEXT: vadd.i32 d16, d16, d16 1174; HARD-NEXT: vrev64.32 d16, d16 1175; HARD-NEXT: vstr d16, [r4] 1176; HARD-NEXT: pop {r4, pc} 1177 %1 = load <8 x i8>, ptr %p 1178 %2 = add <8 x i8> %1, %1 1179 %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2) 1180 %4 = add <2 x i32> %3, %3 1181 store <2 x i32> %4, ptr %q 1182 ret void 1183} 1184 1185declare <4 x i16> @test_v4i16_i64_helper(i64 %p) 1186define void @test_v4i16_i64(ptr %p, ptr %q) { 1187; SOFT-LABEL: test_v4i16_i64: 1188; SOFT: @ %bb.0: 1189; SOFT-NEXT: .save {r4, lr} 1190; SOFT-NEXT: push {r4, lr} 1191; SOFT-NEXT: mov r4, r1 1192; SOFT-NEXT: ldrd r0, r1, [r0] 1193; SOFT-NEXT: adds r1, r1, r1 1194; SOFT-NEXT: adc r0, r0, r0 1195; SOFT-NEXT: bl test_v4i16_i64_helper 1196; SOFT-NEXT: vmov d16, r1, r0 1197; SOFT-NEXT: vrev64.16 d16, d16 1198; SOFT-NEXT: vadd.i16 d16, d16, d16 1199; SOFT-NEXT: vrev64.16 d16, d16 1200; SOFT-NEXT: vstr d16, [r4] 1201; SOFT-NEXT: pop {r4, pc} 1202; 1203; HARD-LABEL: test_v4i16_i64: 1204; HARD: @ %bb.0: 1205; HARD-NEXT: .save {r4, lr} 1206; HARD-NEXT: push {r4, lr} 1207; HARD-NEXT: mov r4, r1 1208; HARD-NEXT: ldrd r0, r1, [r0] 1209; HARD-NEXT: adds r1, r1, r1 1210; HARD-NEXT: adc r0, r0, r0 1211; HARD-NEXT: bl test_v4i16_i64_helper 1212; HARD-NEXT: vrev64.16 d16, d0 1213; HARD-NEXT: vadd.i16 d16, d16, d16 1214; HARD-NEXT: vrev64.16 d16, d16 1215; HARD-NEXT: vstr d16, [r4] 1216; HARD-NEXT: pop {r4, pc} 1217 %1 = load i64, ptr %p 1218 %2 = add i64 %1, %1 1219 %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2) 1220 %4 = add <4 x i16> %3, %3 1221 store <4 x i16> %4, ptr %q 1222 ret void 1223} 1224 1225declare <4 x i16> @test_v4i16_f64_helper(double %p) 1226define void @test_v4i16_f64(ptr %p, ptr %q) { 1227; SOFT-LABEL: test_v4i16_f64: 1228; SOFT: @ %bb.0: 1229; SOFT-NEXT: .save {r4, lr} 1230; SOFT-NEXT: push {r4, lr} 1231; SOFT-NEXT: vldr d16, [r0] 1232; SOFT-NEXT: mov r4, r1 1233; SOFT-NEXT: vadd.f64 d16, d16, d16 1234; SOFT-NEXT: vmov r1, r0, d16 1235; SOFT-NEXT: bl test_v4i16_f64_helper 1236; SOFT-NEXT: vmov d16, r1, r0 1237; SOFT-NEXT: vrev64.16 d16, d16 1238; SOFT-NEXT: vadd.i16 d16, d16, d16 1239; SOFT-NEXT: vrev64.16 d16, d16 1240; SOFT-NEXT: vstr d16, [r4] 1241; SOFT-NEXT: pop {r4, pc} 1242; 1243; HARD-LABEL: test_v4i16_f64: 1244; HARD: @ %bb.0: 1245; HARD-NEXT: .save {r4, lr} 1246; HARD-NEXT: push {r4, lr} 1247; HARD-NEXT: vldr d16, [r0] 1248; HARD-NEXT: mov r4, r1 1249; HARD-NEXT: vadd.f64 d0, d16, d16 1250; HARD-NEXT: bl test_v4i16_f64_helper 1251; HARD-NEXT: vrev64.16 d16, d0 1252; HARD-NEXT: vadd.i16 d16, d16, d16 1253; HARD-NEXT: vrev64.16 d16, d16 1254; HARD-NEXT: vstr d16, [r4] 1255; HARD-NEXT: pop {r4, pc} 1256 %1 = load double, ptr %p 1257 %2 = fadd double %1, %1 1258 %3 = call <4 x i16> @test_v4i16_f64_helper(double %2) 1259 %4 = add <4 x i16> %3, %3 1260 store <4 x i16> %4, ptr %q 1261 ret void 1262} 1263 1264declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p) 1265define void @test_v4i16_v1i64(ptr %p, ptr %q) { 1266; SOFT-LABEL: test_v4i16_v1i64: 1267; SOFT: @ %bb.0: 1268; SOFT-NEXT: .save {r4, lr} 1269; SOFT-NEXT: push {r4, lr} 1270; SOFT-NEXT: vldr d16, [r0] 1271; SOFT-NEXT: mov r4, r1 1272; SOFT-NEXT: vadd.i64 d16, d16, d16 1273; SOFT-NEXT: vmov r1, r0, d16 1274; SOFT-NEXT: bl test_v4i16_v1i64_helper 1275; SOFT-NEXT: vmov d16, r1, r0 1276; SOFT-NEXT: vrev64.16 d16, d16 1277; SOFT-NEXT: vadd.i16 d16, d16, d16 1278; SOFT-NEXT: vrev64.16 d16, d16 1279; SOFT-NEXT: vstr d16, [r4] 1280; SOFT-NEXT: pop {r4, pc} 1281; 1282; HARD-LABEL: test_v4i16_v1i64: 1283; HARD: @ %bb.0: 1284; HARD-NEXT: .save {r4, lr} 1285; HARD-NEXT: push {r4, lr} 1286; HARD-NEXT: vldr d16, [r0] 1287; HARD-NEXT: mov r4, r1 1288; HARD-NEXT: vadd.i64 d0, d16, d16 1289; HARD-NEXT: bl test_v4i16_v1i64_helper 1290; HARD-NEXT: vrev64.16 d16, d0 1291; HARD-NEXT: vadd.i16 d16, d16, d16 1292; HARD-NEXT: vrev64.16 d16, d16 1293; HARD-NEXT: vstr d16, [r4] 1294; HARD-NEXT: pop {r4, pc} 1295 %1 = load <1 x i64>, ptr %p 1296 %2 = add <1 x i64> %1, %1 1297 %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2) 1298 %4 = add <4 x i16> %3, %3 1299 store <4 x i16> %4, ptr %q 1300 ret void 1301} 1302 1303declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p) 1304define void @test_v4i16_v2f32(ptr %p, ptr %q) { 1305; SOFT-LABEL: test_v4i16_v2f32: 1306; SOFT: @ %bb.0: 1307; SOFT-NEXT: .save {r4, lr} 1308; SOFT-NEXT: push {r4, lr} 1309; SOFT-NEXT: vldr d16, [r0] 1310; SOFT-NEXT: mov r4, r1 1311; SOFT-NEXT: vrev64.32 d16, d16 1312; SOFT-NEXT: vadd.f32 d16, d16, d16 1313; SOFT-NEXT: vrev64.32 d16, d16 1314; SOFT-NEXT: vmov r1, r0, d16 1315; SOFT-NEXT: bl test_v4i16_v2f32_helper 1316; SOFT-NEXT: vmov d16, r1, r0 1317; SOFT-NEXT: vrev64.16 d16, d16 1318; SOFT-NEXT: vadd.i16 d16, d16, d16 1319; SOFT-NEXT: vrev64.16 d16, d16 1320; SOFT-NEXT: vstr d16, [r4] 1321; SOFT-NEXT: pop {r4, pc} 1322; 1323; HARD-LABEL: test_v4i16_v2f32: 1324; HARD: @ %bb.0: 1325; HARD-NEXT: .save {r4, lr} 1326; HARD-NEXT: push {r4, lr} 1327; HARD-NEXT: vldr d16, [r0] 1328; HARD-NEXT: mov r4, r1 1329; HARD-NEXT: vrev64.32 d16, d16 1330; HARD-NEXT: vadd.f32 d16, d16, d16 1331; HARD-NEXT: vrev64.32 d0, d16 1332; HARD-NEXT: bl test_v4i16_v2f32_helper 1333; HARD-NEXT: vrev64.16 d16, d0 1334; HARD-NEXT: vadd.i16 d16, d16, d16 1335; HARD-NEXT: vrev64.16 d16, d16 1336; HARD-NEXT: vstr d16, [r4] 1337; HARD-NEXT: pop {r4, pc} 1338 %1 = load <2 x float>, ptr %p 1339 %2 = fadd <2 x float> %1, %1 1340 %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2) 1341 %4 = add <4 x i16> %3, %3 1342 store <4 x i16> %4, ptr %q 1343 ret void 1344} 1345 1346declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p) 1347define void @test_v4i16_v2i32(ptr %p, ptr %q) { 1348; SOFT-LABEL: test_v4i16_v2i32: 1349; SOFT: @ %bb.0: 1350; SOFT-NEXT: .save {r4, lr} 1351; SOFT-NEXT: push {r4, lr} 1352; SOFT-NEXT: vldr d16, [r0] 1353; SOFT-NEXT: mov r4, r1 1354; SOFT-NEXT: vrev64.32 d16, d16 1355; SOFT-NEXT: vadd.i32 d16, d16, d16 1356; SOFT-NEXT: vrev64.32 d16, d16 1357; SOFT-NEXT: vmov r1, r0, d16 1358; SOFT-NEXT: bl test_v4i16_v2i32_helper 1359; SOFT-NEXT: vmov d16, r1, r0 1360; SOFT-NEXT: vrev64.16 d16, d16 1361; SOFT-NEXT: vadd.i16 d16, d16, d16 1362; SOFT-NEXT: vrev64.16 d16, d16 1363; SOFT-NEXT: vstr d16, [r4] 1364; SOFT-NEXT: pop {r4, pc} 1365; 1366; HARD-LABEL: test_v4i16_v2i32: 1367; HARD: @ %bb.0: 1368; HARD-NEXT: .save {r4, lr} 1369; HARD-NEXT: push {r4, lr} 1370; HARD-NEXT: vldr d16, [r0] 1371; HARD-NEXT: mov r4, r1 1372; HARD-NEXT: vrev64.32 d16, d16 1373; HARD-NEXT: vadd.i32 d16, d16, d16 1374; HARD-NEXT: vrev64.32 d0, d16 1375; HARD-NEXT: bl test_v4i16_v2i32_helper 1376; HARD-NEXT: vrev64.16 d16, d0 1377; HARD-NEXT: vadd.i16 d16, d16, d16 1378; HARD-NEXT: vrev64.16 d16, d16 1379; HARD-NEXT: vstr d16, [r4] 1380; HARD-NEXT: pop {r4, pc} 1381 %1 = load <2 x i32>, ptr %p 1382 %2 = add <2 x i32> %1, %1 1383 %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2) 1384 %4 = add <4 x i16> %3, %3 1385 store <4 x i16> %4, ptr %q 1386 ret void 1387} 1388 1389declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p) 1390define void @test_v4i16_v8i8(ptr %p, ptr %q) { 1391; SOFT-LABEL: test_v4i16_v8i8: 1392; SOFT: @ %bb.0: 1393; SOFT-NEXT: .save {r4, lr} 1394; SOFT-NEXT: push {r4, lr} 1395; SOFT-NEXT: vldr d16, [r0] 1396; SOFT-NEXT: mov r4, r1 1397; SOFT-NEXT: vrev64.8 d16, d16 1398; SOFT-NEXT: vadd.i8 d16, d16, d16 1399; SOFT-NEXT: vrev64.8 d16, d16 1400; SOFT-NEXT: vmov r1, r0, d16 1401; SOFT-NEXT: bl test_v4i16_v8i8_helper 1402; SOFT-NEXT: vmov d16, r1, r0 1403; SOFT-NEXT: vrev64.16 d16, d16 1404; SOFT-NEXT: vadd.i16 d16, d16, d16 1405; SOFT-NEXT: vrev64.16 d16, d16 1406; SOFT-NEXT: vstr d16, [r4] 1407; SOFT-NEXT: pop {r4, pc} 1408; 1409; HARD-LABEL: test_v4i16_v8i8: 1410; HARD: @ %bb.0: 1411; HARD-NEXT: .save {r4, lr} 1412; HARD-NEXT: push {r4, lr} 1413; HARD-NEXT: vldr d16, [r0] 1414; HARD-NEXT: mov r4, r1 1415; HARD-NEXT: vrev64.8 d16, d16 1416; HARD-NEXT: vadd.i8 d16, d16, d16 1417; HARD-NEXT: vrev64.8 d0, d16 1418; HARD-NEXT: bl test_v4i16_v8i8_helper 1419; HARD-NEXT: vrev64.16 d16, d0 1420; HARD-NEXT: vadd.i16 d16, d16, d16 1421; HARD-NEXT: vrev64.16 d16, d16 1422; HARD-NEXT: vstr d16, [r4] 1423; HARD-NEXT: pop {r4, pc} 1424 %1 = load <8 x i8>, ptr %p 1425 %2 = add <8 x i8> %1, %1 1426 %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2) 1427 %4 = add <4 x i16> %3, %3 1428 store <4 x i16> %4, ptr %q 1429 ret void 1430} 1431 1432declare <8 x i8> @test_v8i8_i64_helper(i64 %p) 1433define void @test_v8i8_i64(ptr %p, ptr %q) { 1434; SOFT-LABEL: test_v8i8_i64: 1435; SOFT: @ %bb.0: 1436; SOFT-NEXT: .save {r4, lr} 1437; SOFT-NEXT: push {r4, lr} 1438; SOFT-NEXT: mov r4, r1 1439; SOFT-NEXT: ldrd r0, r1, [r0] 1440; SOFT-NEXT: adds r1, r1, r1 1441; SOFT-NEXT: adc r0, r0, r0 1442; SOFT-NEXT: bl test_v8i8_i64_helper 1443; SOFT-NEXT: vmov d16, r1, r0 1444; SOFT-NEXT: vrev64.8 d16, d16 1445; SOFT-NEXT: vadd.i8 d16, d16, d16 1446; SOFT-NEXT: vrev64.8 d16, d16 1447; SOFT-NEXT: vstr d16, [r4] 1448; SOFT-NEXT: pop {r4, pc} 1449; 1450; HARD-LABEL: test_v8i8_i64: 1451; HARD: @ %bb.0: 1452; HARD-NEXT: .save {r4, lr} 1453; HARD-NEXT: push {r4, lr} 1454; HARD-NEXT: mov r4, r1 1455; HARD-NEXT: ldrd r0, r1, [r0] 1456; HARD-NEXT: adds r1, r1, r1 1457; HARD-NEXT: adc r0, r0, r0 1458; HARD-NEXT: bl test_v8i8_i64_helper 1459; HARD-NEXT: vrev64.8 d16, d0 1460; HARD-NEXT: vadd.i8 d16, d16, d16 1461; HARD-NEXT: vrev64.8 d16, d16 1462; HARD-NEXT: vstr d16, [r4] 1463; HARD-NEXT: pop {r4, pc} 1464 %1 = load i64, ptr %p 1465 %2 = add i64 %1, %1 1466 %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2) 1467 %4 = add <8 x i8> %3, %3 1468 store <8 x i8> %4, ptr %q 1469 ret void 1470} 1471 1472declare <8 x i8> @test_v8i8_f64_helper(double %p) 1473define void @test_v8i8_f64(ptr %p, ptr %q) { 1474; SOFT-LABEL: test_v8i8_f64: 1475; SOFT: @ %bb.0: 1476; SOFT-NEXT: .save {r4, lr} 1477; SOFT-NEXT: push {r4, lr} 1478; SOFT-NEXT: vldr d16, [r0] 1479; SOFT-NEXT: mov r4, r1 1480; SOFT-NEXT: vadd.f64 d16, d16, d16 1481; SOFT-NEXT: vmov r1, r0, d16 1482; SOFT-NEXT: bl test_v8i8_f64_helper 1483; SOFT-NEXT: vmov d16, r1, r0 1484; SOFT-NEXT: vrev64.8 d16, d16 1485; SOFT-NEXT: vadd.i8 d16, d16, d16 1486; SOFT-NEXT: vrev64.8 d16, d16 1487; SOFT-NEXT: vstr d16, [r4] 1488; SOFT-NEXT: pop {r4, pc} 1489; 1490; HARD-LABEL: test_v8i8_f64: 1491; HARD: @ %bb.0: 1492; HARD-NEXT: .save {r4, lr} 1493; HARD-NEXT: push {r4, lr} 1494; HARD-NEXT: vldr d16, [r0] 1495; HARD-NEXT: mov r4, r1 1496; HARD-NEXT: vadd.f64 d0, d16, d16 1497; HARD-NEXT: bl test_v8i8_f64_helper 1498; HARD-NEXT: vrev64.8 d16, d0 1499; HARD-NEXT: vadd.i8 d16, d16, d16 1500; HARD-NEXT: vrev64.8 d16, d16 1501; HARD-NEXT: vstr d16, [r4] 1502; HARD-NEXT: pop {r4, pc} 1503 %1 = load double, ptr %p 1504 %2 = fadd double %1, %1 1505 %3 = call <8 x i8> @test_v8i8_f64_helper(double %2) 1506 %4 = add <8 x i8> %3, %3 1507 store <8 x i8> %4, ptr %q 1508 ret void 1509} 1510 1511declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p) 1512define void @test_v8i8_v1i64(ptr %p, ptr %q) { 1513; SOFT-LABEL: test_v8i8_v1i64: 1514; SOFT: @ %bb.0: 1515; SOFT-NEXT: .save {r4, lr} 1516; SOFT-NEXT: push {r4, lr} 1517; SOFT-NEXT: vldr d16, [r0] 1518; SOFT-NEXT: mov r4, r1 1519; SOFT-NEXT: vadd.i64 d16, d16, d16 1520; SOFT-NEXT: vmov r1, r0, d16 1521; SOFT-NEXT: bl test_v8i8_v1i64_helper 1522; SOFT-NEXT: vmov d16, r1, r0 1523; SOFT-NEXT: vrev64.8 d16, d16 1524; SOFT-NEXT: vadd.i8 d16, d16, d16 1525; SOFT-NEXT: vrev64.8 d16, d16 1526; SOFT-NEXT: vstr d16, [r4] 1527; SOFT-NEXT: pop {r4, pc} 1528; 1529; HARD-LABEL: test_v8i8_v1i64: 1530; HARD: @ %bb.0: 1531; HARD-NEXT: .save {r4, lr} 1532; HARD-NEXT: push {r4, lr} 1533; HARD-NEXT: vldr d16, [r0] 1534; HARD-NEXT: mov r4, r1 1535; HARD-NEXT: vadd.i64 d0, d16, d16 1536; HARD-NEXT: bl test_v8i8_v1i64_helper 1537; HARD-NEXT: vrev64.8 d16, d0 1538; HARD-NEXT: vadd.i8 d16, d16, d16 1539; HARD-NEXT: vrev64.8 d16, d16 1540; HARD-NEXT: vstr d16, [r4] 1541; HARD-NEXT: pop {r4, pc} 1542 %1 = load <1 x i64>, ptr %p 1543 %2 = add <1 x i64> %1, %1 1544 %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2) 1545 %4 = add <8 x i8> %3, %3 1546 store <8 x i8> %4, ptr %q 1547 ret void 1548} 1549 1550declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p) 1551define void @test_v8i8_v2f32(ptr %p, ptr %q) { 1552; SOFT-LABEL: test_v8i8_v2f32: 1553; SOFT: @ %bb.0: 1554; SOFT-NEXT: .save {r4, lr} 1555; SOFT-NEXT: push {r4, lr} 1556; SOFT-NEXT: vldr d16, [r0] 1557; SOFT-NEXT: mov r4, r1 1558; SOFT-NEXT: vrev64.32 d16, d16 1559; SOFT-NEXT: vadd.f32 d16, d16, d16 1560; SOFT-NEXT: vrev64.32 d16, d16 1561; SOFT-NEXT: vmov r1, r0, d16 1562; SOFT-NEXT: bl test_v8i8_v2f32_helper 1563; SOFT-NEXT: vmov d16, r1, r0 1564; SOFT-NEXT: vrev64.8 d16, d16 1565; SOFT-NEXT: vadd.i8 d16, d16, d16 1566; SOFT-NEXT: vrev64.8 d16, d16 1567; SOFT-NEXT: vstr d16, [r4] 1568; SOFT-NEXT: pop {r4, pc} 1569; 1570; HARD-LABEL: test_v8i8_v2f32: 1571; HARD: @ %bb.0: 1572; HARD-NEXT: .save {r4, lr} 1573; HARD-NEXT: push {r4, lr} 1574; HARD-NEXT: vldr d16, [r0] 1575; HARD-NEXT: mov r4, r1 1576; HARD-NEXT: vrev64.32 d16, d16 1577; HARD-NEXT: vadd.f32 d16, d16, d16 1578; HARD-NEXT: vrev64.32 d0, d16 1579; HARD-NEXT: bl test_v8i8_v2f32_helper 1580; HARD-NEXT: vrev64.8 d16, d0 1581; HARD-NEXT: vadd.i8 d16, d16, d16 1582; HARD-NEXT: vrev64.8 d16, d16 1583; HARD-NEXT: vstr d16, [r4] 1584; HARD-NEXT: pop {r4, pc} 1585 %1 = load <2 x float>, ptr %p 1586 %2 = fadd <2 x float> %1, %1 1587 %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2) 1588 %4 = add <8 x i8> %3, %3 1589 store <8 x i8> %4, ptr %q 1590 ret void 1591} 1592 1593declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p) 1594define void @test_v8i8_v2i32(ptr %p, ptr %q) { 1595; SOFT-LABEL: test_v8i8_v2i32: 1596; SOFT: @ %bb.0: 1597; SOFT-NEXT: .save {r4, lr} 1598; SOFT-NEXT: push {r4, lr} 1599; SOFT-NEXT: vldr d16, [r0] 1600; SOFT-NEXT: mov r4, r1 1601; SOFT-NEXT: vrev64.32 d16, d16 1602; SOFT-NEXT: vadd.i32 d16, d16, d16 1603; SOFT-NEXT: vrev64.32 d16, d16 1604; SOFT-NEXT: vmov r1, r0, d16 1605; SOFT-NEXT: bl test_v8i8_v2i32_helper 1606; SOFT-NEXT: vmov d16, r1, r0 1607; SOFT-NEXT: vrev64.8 d16, d16 1608; SOFT-NEXT: vadd.i8 d16, d16, d16 1609; SOFT-NEXT: vrev64.8 d16, d16 1610; SOFT-NEXT: vstr d16, [r4] 1611; SOFT-NEXT: pop {r4, pc} 1612; 1613; HARD-LABEL: test_v8i8_v2i32: 1614; HARD: @ %bb.0: 1615; HARD-NEXT: .save {r4, lr} 1616; HARD-NEXT: push {r4, lr} 1617; HARD-NEXT: vldr d16, [r0] 1618; HARD-NEXT: mov r4, r1 1619; HARD-NEXT: vrev64.32 d16, d16 1620; HARD-NEXT: vadd.i32 d16, d16, d16 1621; HARD-NEXT: vrev64.32 d0, d16 1622; HARD-NEXT: bl test_v8i8_v2i32_helper 1623; HARD-NEXT: vrev64.8 d16, d0 1624; HARD-NEXT: vadd.i8 d16, d16, d16 1625; HARD-NEXT: vrev64.8 d16, d16 1626; HARD-NEXT: vstr d16, [r4] 1627; HARD-NEXT: pop {r4, pc} 1628 %1 = load <2 x i32>, ptr %p 1629 %2 = add <2 x i32> %1, %1 1630 %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2) 1631 %4 = add <8 x i8> %3, %3 1632 store <8 x i8> %4, ptr %q 1633 ret void 1634} 1635 1636declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p) 1637define void @test_v8i8_v4i16(ptr %p, ptr %q) { 1638; SOFT-LABEL: test_v8i8_v4i16: 1639; SOFT: @ %bb.0: 1640; SOFT-NEXT: .save {r4, lr} 1641; SOFT-NEXT: push {r4, lr} 1642; SOFT-NEXT: vldr d16, [r0] 1643; SOFT-NEXT: mov r4, r1 1644; SOFT-NEXT: vrev64.16 d16, d16 1645; SOFT-NEXT: vadd.i16 d16, d16, d16 1646; SOFT-NEXT: vrev64.16 d16, d16 1647; SOFT-NEXT: vmov r1, r0, d16 1648; SOFT-NEXT: bl test_v8i8_v4i16_helper 1649; SOFT-NEXT: vmov d16, r1, r0 1650; SOFT-NEXT: vrev64.8 d16, d16 1651; SOFT-NEXT: vadd.i8 d16, d16, d16 1652; SOFT-NEXT: vrev64.8 d16, d16 1653; SOFT-NEXT: vstr d16, [r4] 1654; SOFT-NEXT: pop {r4, pc} 1655; 1656; HARD-LABEL: test_v8i8_v4i16: 1657; HARD: @ %bb.0: 1658; HARD-NEXT: .save {r4, lr} 1659; HARD-NEXT: push {r4, lr} 1660; HARD-NEXT: vldr d16, [r0] 1661; HARD-NEXT: mov r4, r1 1662; HARD-NEXT: vrev64.16 d16, d16 1663; HARD-NEXT: vadd.i16 d16, d16, d16 1664; HARD-NEXT: vrev64.16 d0, d16 1665; HARD-NEXT: bl test_v8i8_v4i16_helper 1666; HARD-NEXT: vrev64.8 d16, d0 1667; HARD-NEXT: vadd.i8 d16, d16, d16 1668; HARD-NEXT: vrev64.8 d16, d16 1669; HARD-NEXT: vstr d16, [r4] 1670; HARD-NEXT: pop {r4, pc} 1671 %1 = load <4 x i16>, ptr %p 1672 %2 = add <4 x i16> %1, %1 1673 %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2) 1674 %4 = add <8 x i8> %3, %3 1675 store <8 x i8> %4, ptr %q 1676 ret void 1677} 1678 1679declare fp128 @test_f128_v2f64_helper(<2 x double> %p) 1680define void @test_f128_v2f64(ptr %p, ptr %q) { 1681; SOFT-LABEL: test_f128_v2f64: 1682; SOFT: @ %bb.0: 1683; SOFT-NEXT: .save {r4, lr} 1684; SOFT-NEXT: push {r4, lr} 1685; SOFT-NEXT: .pad #16 1686; SOFT-NEXT: sub sp, sp, #16 1687; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 1688; SOFT-NEXT: mov r4, r1 1689; SOFT-NEXT: vadd.f64 d18, d16, d16 1690; SOFT-NEXT: vadd.f64 d16, d17, d17 1691; SOFT-NEXT: vmov r1, r0, d18 1692; SOFT-NEXT: vmov r3, r2, d16 1693; SOFT-NEXT: bl test_f128_v2f64_helper 1694; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1695; SOFT-NEXT: bl __addtf3 1696; SOFT-NEXT: stm r4, {r0, r1, r2, r3} 1697; SOFT-NEXT: add sp, sp, #16 1698; SOFT-NEXT: pop {r4, pc} 1699; 1700; HARD-LABEL: test_f128_v2f64: 1701; HARD: @ %bb.0: 1702; HARD-NEXT: .save {r4, lr} 1703; HARD-NEXT: push {r4, lr} 1704; HARD-NEXT: .pad #16 1705; HARD-NEXT: sub sp, sp, #16 1706; HARD-NEXT: vld1.64 {d16, d17}, [r0] 1707; HARD-NEXT: mov r4, r1 1708; HARD-NEXT: vadd.f64 d1, d17, d17 1709; HARD-NEXT: vadd.f64 d0, d16, d16 1710; HARD-NEXT: bl test_f128_v2f64_helper 1711; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1712; HARD-NEXT: bl __addtf3 1713; HARD-NEXT: stm r4, {r0, r1, r2, r3} 1714; HARD-NEXT: add sp, sp, #16 1715; HARD-NEXT: pop {r4, pc} 1716 %1 = load <2 x double>, ptr %p 1717 %2 = fadd <2 x double> %1, %1 1718 %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2) 1719 %4 = fadd fp128 %3, %3 1720 store fp128 %4, ptr %q 1721 ret void 1722} 1723 1724declare fp128 @test_f128_v2i64_helper(<2 x i64> %p) 1725define void @test_f128_v2i64(ptr %p, ptr %q) { 1726; SOFT-LABEL: test_f128_v2i64: 1727; SOFT: @ %bb.0: 1728; SOFT-NEXT: .save {r4, lr} 1729; SOFT-NEXT: push {r4, lr} 1730; SOFT-NEXT: .pad #16 1731; SOFT-NEXT: sub sp, sp, #16 1732; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 1733; SOFT-NEXT: mov r4, r1 1734; SOFT-NEXT: vadd.i64 q8, q8, q8 1735; SOFT-NEXT: vmov r1, r0, d16 1736; SOFT-NEXT: vmov r3, r2, d17 1737; SOFT-NEXT: bl test_f128_v2i64_helper 1738; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1739; SOFT-NEXT: bl __addtf3 1740; SOFT-NEXT: stm r4, {r0, r1, r2, r3} 1741; SOFT-NEXT: add sp, sp, #16 1742; SOFT-NEXT: pop {r4, pc} 1743; 1744; HARD-LABEL: test_f128_v2i64: 1745; HARD: @ %bb.0: 1746; HARD-NEXT: .save {r4, lr} 1747; HARD-NEXT: push {r4, lr} 1748; HARD-NEXT: .pad #16 1749; HARD-NEXT: sub sp, sp, #16 1750; HARD-NEXT: vld1.64 {d16, d17}, [r0] 1751; HARD-NEXT: mov r4, r1 1752; HARD-NEXT: vadd.i64 q0, q8, q8 1753; HARD-NEXT: bl test_f128_v2i64_helper 1754; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1755; HARD-NEXT: bl __addtf3 1756; HARD-NEXT: stm r4, {r0, r1, r2, r3} 1757; HARD-NEXT: add sp, sp, #16 1758; HARD-NEXT: pop {r4, pc} 1759 %1 = load <2 x i64>, ptr %p 1760 %2 = add <2 x i64> %1, %1 1761 %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2) 1762 %4 = fadd fp128 %3, %3 1763 store fp128 %4, ptr %q 1764 ret void 1765} 1766 1767declare fp128 @test_f128_v4f32_helper(<4 x float> %p) 1768define void @test_f128_v4f32(ptr %p, ptr %q) { 1769; SOFT-LABEL: test_f128_v4f32: 1770; SOFT: @ %bb.0: 1771; SOFT-NEXT: .save {r4, lr} 1772; SOFT-NEXT: push {r4, lr} 1773; SOFT-NEXT: .pad #16 1774; SOFT-NEXT: sub sp, sp, #16 1775; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 1776; SOFT-NEXT: mov r4, r1 1777; SOFT-NEXT: vrev64.32 q8, q8 1778; SOFT-NEXT: vadd.f32 q8, q8, q8 1779; SOFT-NEXT: vrev64.32 q8, q8 1780; SOFT-NEXT: vmov r1, r0, d16 1781; SOFT-NEXT: vmov r3, r2, d17 1782; SOFT-NEXT: bl test_f128_v4f32_helper 1783; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1784; SOFT-NEXT: bl __addtf3 1785; SOFT-NEXT: stm r4, {r0, r1, r2, r3} 1786; SOFT-NEXT: add sp, sp, #16 1787; SOFT-NEXT: pop {r4, pc} 1788; 1789; HARD-LABEL: test_f128_v4f32: 1790; HARD: @ %bb.0: 1791; HARD-NEXT: .save {r4, lr} 1792; HARD-NEXT: push {r4, lr} 1793; HARD-NEXT: .pad #16 1794; HARD-NEXT: sub sp, sp, #16 1795; HARD-NEXT: vld1.64 {d16, d17}, [r0] 1796; HARD-NEXT: mov r4, r1 1797; HARD-NEXT: vrev64.32 q8, q8 1798; HARD-NEXT: vadd.f32 q8, q8, q8 1799; HARD-NEXT: vrev64.32 q0, q8 1800; HARD-NEXT: bl test_f128_v4f32_helper 1801; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1802; HARD-NEXT: bl __addtf3 1803; HARD-NEXT: stm r4, {r0, r1, r2, r3} 1804; HARD-NEXT: add sp, sp, #16 1805; HARD-NEXT: pop {r4, pc} 1806 %1 = load <4 x float>, ptr %p 1807 %2 = fadd <4 x float> %1, %1 1808 %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2) 1809 %4 = fadd fp128 %3, %3 1810 store fp128 %4, ptr %q 1811 ret void 1812} 1813 1814declare fp128 @test_f128_v4i32_helper(<4 x i32> %p) 1815define void @test_f128_v4i32(ptr %p, ptr %q) { 1816; SOFT-LABEL: test_f128_v4i32: 1817; SOFT: @ %bb.0: 1818; SOFT-NEXT: .save {r4, lr} 1819; SOFT-NEXT: push {r4, lr} 1820; SOFT-NEXT: .pad #16 1821; SOFT-NEXT: sub sp, sp, #16 1822; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 1823; SOFT-NEXT: mov r4, r1 1824; SOFT-NEXT: vrev64.32 q8, q8 1825; SOFT-NEXT: vadd.i32 q8, q8, q8 1826; SOFT-NEXT: vrev64.32 q8, q8 1827; SOFT-NEXT: vmov r1, r0, d16 1828; SOFT-NEXT: vmov r3, r2, d17 1829; SOFT-NEXT: bl test_f128_v4i32_helper 1830; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1831; SOFT-NEXT: bl __addtf3 1832; SOFT-NEXT: stm r4, {r0, r1, r2, r3} 1833; SOFT-NEXT: add sp, sp, #16 1834; SOFT-NEXT: pop {r4, pc} 1835; 1836; HARD-LABEL: test_f128_v4i32: 1837; HARD: @ %bb.0: 1838; HARD-NEXT: .save {r4, lr} 1839; HARD-NEXT: push {r4, lr} 1840; HARD-NEXT: .pad #16 1841; HARD-NEXT: sub sp, sp, #16 1842; HARD-NEXT: vld1.64 {d16, d17}, [r0] 1843; HARD-NEXT: mov r4, r1 1844; HARD-NEXT: vrev64.32 q8, q8 1845; HARD-NEXT: vadd.i32 q8, q8, q8 1846; HARD-NEXT: vrev64.32 q0, q8 1847; HARD-NEXT: bl test_f128_v4i32_helper 1848; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1849; HARD-NEXT: bl __addtf3 1850; HARD-NEXT: stm r4, {r0, r1, r2, r3} 1851; HARD-NEXT: add sp, sp, #16 1852; HARD-NEXT: pop {r4, pc} 1853 %1 = load <4 x i32>, ptr %p 1854 %2 = add <4 x i32> %1, %1 1855 %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2) 1856 %4 = fadd fp128 %3, %3 1857 store fp128 %4, ptr %q 1858 ret void 1859} 1860 1861declare fp128 @test_f128_v8i16_helper(<8 x i16> %p) 1862define void @test_f128_v8i16(ptr %p, ptr %q) { 1863; SOFT-LABEL: test_f128_v8i16: 1864; SOFT: @ %bb.0: 1865; SOFT-NEXT: .save {r4, lr} 1866; SOFT-NEXT: push {r4, lr} 1867; SOFT-NEXT: .pad #16 1868; SOFT-NEXT: sub sp, sp, #16 1869; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 1870; SOFT-NEXT: mov r4, r1 1871; SOFT-NEXT: vrev64.16 q8, q8 1872; SOFT-NEXT: vadd.i16 q8, q8, q8 1873; SOFT-NEXT: vrev64.16 q8, q8 1874; SOFT-NEXT: vmov r1, r0, d16 1875; SOFT-NEXT: vmov r3, r2, d17 1876; SOFT-NEXT: bl test_f128_v8i16_helper 1877; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1878; SOFT-NEXT: bl __addtf3 1879; SOFT-NEXT: stm r4, {r0, r1, r2, r3} 1880; SOFT-NEXT: add sp, sp, #16 1881; SOFT-NEXT: pop {r4, pc} 1882; 1883; HARD-LABEL: test_f128_v8i16: 1884; HARD: @ %bb.0: 1885; HARD-NEXT: .save {r4, lr} 1886; HARD-NEXT: push {r4, lr} 1887; HARD-NEXT: .pad #16 1888; HARD-NEXT: sub sp, sp, #16 1889; HARD-NEXT: vld1.64 {d16, d17}, [r0] 1890; HARD-NEXT: mov r4, r1 1891; HARD-NEXT: vrev64.16 q8, q8 1892; HARD-NEXT: vadd.i16 q8, q8, q8 1893; HARD-NEXT: vrev64.16 q0, q8 1894; HARD-NEXT: bl test_f128_v8i16_helper 1895; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1896; HARD-NEXT: bl __addtf3 1897; HARD-NEXT: stm r4, {r0, r1, r2, r3} 1898; HARD-NEXT: add sp, sp, #16 1899; HARD-NEXT: pop {r4, pc} 1900 %1 = load <8 x i16>, ptr %p 1901 %2 = add <8 x i16> %1, %1 1902 %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2) 1903 %4 = fadd fp128 %3, %3 1904 store fp128 %4, ptr %q 1905 ret void 1906} 1907 1908declare fp128 @test_f128_v16i8_helper(<16 x i8> %p) 1909define void @test_f128_v16i8(ptr %p, ptr %q) { 1910; SOFT-LABEL: test_f128_v16i8: 1911; SOFT: @ %bb.0: 1912; SOFT-NEXT: .save {r4, lr} 1913; SOFT-NEXT: push {r4, lr} 1914; SOFT-NEXT: .pad #16 1915; SOFT-NEXT: sub sp, sp, #16 1916; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 1917; SOFT-NEXT: mov r4, r1 1918; SOFT-NEXT: vrev64.8 q8, q8 1919; SOFT-NEXT: vadd.i8 q8, q8, q8 1920; SOFT-NEXT: vrev64.8 q8, q8 1921; SOFT-NEXT: vmov r1, r0, d16 1922; SOFT-NEXT: vmov r3, r2, d17 1923; SOFT-NEXT: bl test_f128_v16i8_helper 1924; SOFT-NEXT: stm sp, {r0, r1, r2, r3} 1925; SOFT-NEXT: bl __addtf3 1926; SOFT-NEXT: stm r4, {r0, r1, r2, r3} 1927; SOFT-NEXT: add sp, sp, #16 1928; SOFT-NEXT: pop {r4, pc} 1929; 1930; HARD-LABEL: test_f128_v16i8: 1931; HARD: @ %bb.0: 1932; HARD-NEXT: .save {r4, lr} 1933; HARD-NEXT: push {r4, lr} 1934; HARD-NEXT: .pad #16 1935; HARD-NEXT: sub sp, sp, #16 1936; HARD-NEXT: vld1.64 {d16, d17}, [r0] 1937; HARD-NEXT: mov r4, r1 1938; HARD-NEXT: vrev64.8 q8, q8 1939; HARD-NEXT: vadd.i8 q8, q8, q8 1940; HARD-NEXT: vrev64.8 q0, q8 1941; HARD-NEXT: bl test_f128_v16i8_helper 1942; HARD-NEXT: stm sp, {r0, r1, r2, r3} 1943; HARD-NEXT: bl __addtf3 1944; HARD-NEXT: stm r4, {r0, r1, r2, r3} 1945; HARD-NEXT: add sp, sp, #16 1946; HARD-NEXT: pop {r4, pc} 1947 %1 = load <16 x i8>, ptr %p 1948 %2 = add <16 x i8> %1, %1 1949 %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2) 1950 %4 = fadd fp128 %3, %3 1951 store fp128 %4, ptr %q 1952 ret void 1953} 1954 1955declare <2 x double> @test_v2f64_f128_helper(fp128 %p) 1956define void @test_v2f64_f128(ptr %p, ptr %q) { 1957; SOFT-LABEL: test_v2f64_f128: 1958; SOFT: @ %bb.0: 1959; SOFT-NEXT: .save {r4, r5, r11, lr} 1960; SOFT-NEXT: push {r4, r5, r11, lr} 1961; SOFT-NEXT: .pad #16 1962; SOFT-NEXT: sub sp, sp, #16 1963; SOFT-NEXT: ldr r4, [r0] 1964; SOFT-NEXT: mov r5, r1 1965; SOFT-NEXT: ldmib r0, {r1, r2, r3} 1966; SOFT-NEXT: mov r0, r4 1967; SOFT-NEXT: str r4, [sp] 1968; SOFT-NEXT: stmib sp, {r1, r2, r3} 1969; SOFT-NEXT: bl __addtf3 1970; SOFT-NEXT: bl test_v2f64_f128_helper 1971; SOFT-NEXT: vmov d16, r3, r2 1972; SOFT-NEXT: vmov d17, r1, r0 1973; SOFT-NEXT: vadd.f64 d19, d16, d16 1974; SOFT-NEXT: vadd.f64 d18, d17, d17 1975; SOFT-NEXT: vst1.64 {d18, d19}, [r5] 1976; SOFT-NEXT: add sp, sp, #16 1977; SOFT-NEXT: pop {r4, r5, r11, pc} 1978; 1979; HARD-LABEL: test_v2f64_f128: 1980; HARD: @ %bb.0: 1981; HARD-NEXT: .save {r4, r5, r11, lr} 1982; HARD-NEXT: push {r4, r5, r11, lr} 1983; HARD-NEXT: .pad #16 1984; HARD-NEXT: sub sp, sp, #16 1985; HARD-NEXT: ldr r4, [r0] 1986; HARD-NEXT: mov r5, r1 1987; HARD-NEXT: ldmib r0, {r1, r2, r3} 1988; HARD-NEXT: mov r0, r4 1989; HARD-NEXT: str r4, [sp] 1990; HARD-NEXT: stmib sp, {r1, r2, r3} 1991; HARD-NEXT: bl __addtf3 1992; HARD-NEXT: bl test_v2f64_f128_helper 1993; HARD-NEXT: vadd.f64 d17, d1, d1 1994; HARD-NEXT: vadd.f64 d16, d0, d0 1995; HARD-NEXT: vst1.64 {d16, d17}, [r5] 1996; HARD-NEXT: add sp, sp, #16 1997; HARD-NEXT: pop {r4, r5, r11, pc} 1998 %1 = load fp128, ptr %p 1999 %2 = fadd fp128 %1, %1 2000 %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2) 2001 %4 = fadd <2 x double> %3, %3 2002 store <2 x double> %4, ptr %q 2003 ret void 2004 2005} 2006 2007declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p) 2008define void @test_v2f64_v2i64(ptr %p, ptr %q) { 2009; SOFT-LABEL: test_v2f64_v2i64: 2010; SOFT: @ %bb.0: 2011; SOFT-NEXT: .save {r4, lr} 2012; SOFT-NEXT: push {r4, lr} 2013; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2014; SOFT-NEXT: mov r4, r1 2015; SOFT-NEXT: vadd.i64 q8, q8, q8 2016; SOFT-NEXT: vmov r1, r0, d16 2017; SOFT-NEXT: vmov r3, r2, d17 2018; SOFT-NEXT: bl test_v2f64_v2i64_helper 2019; SOFT-NEXT: vmov d16, r3, r2 2020; SOFT-NEXT: vmov d17, r1, r0 2021; SOFT-NEXT: vadd.f64 d19, d16, d16 2022; SOFT-NEXT: vadd.f64 d18, d17, d17 2023; SOFT-NEXT: vst1.64 {d18, d19}, [r4] 2024; SOFT-NEXT: pop {r4, pc} 2025; 2026; HARD-LABEL: test_v2f64_v2i64: 2027; HARD: @ %bb.0: 2028; HARD-NEXT: .save {r4, lr} 2029; HARD-NEXT: push {r4, lr} 2030; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2031; HARD-NEXT: mov r4, r1 2032; HARD-NEXT: vadd.i64 q0, q8, q8 2033; HARD-NEXT: bl test_v2f64_v2i64_helper 2034; HARD-NEXT: vadd.f64 d17, d1, d1 2035; HARD-NEXT: vadd.f64 d16, d0, d0 2036; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2037; HARD-NEXT: pop {r4, pc} 2038 %1 = load <2 x i64>, ptr %p 2039 %2 = add <2 x i64> %1, %1 2040 %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2) 2041 %4 = fadd <2 x double> %3, %3 2042 store <2 x double> %4, ptr %q 2043 ret void 2044} 2045 2046declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p) 2047define void @test_v2f64_v4f32(ptr %p, ptr %q) { 2048; SOFT-LABEL: test_v2f64_v4f32: 2049; SOFT: @ %bb.0: 2050; SOFT-NEXT: .save {r4, lr} 2051; SOFT-NEXT: push {r4, lr} 2052; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2053; SOFT-NEXT: mov r4, r1 2054; SOFT-NEXT: vrev64.32 q8, q8 2055; SOFT-NEXT: vadd.f32 q8, q8, q8 2056; SOFT-NEXT: vrev64.32 q8, q8 2057; SOFT-NEXT: vmov r1, r0, d16 2058; SOFT-NEXT: vmov r3, r2, d17 2059; SOFT-NEXT: bl test_v2f64_v4f32_helper 2060; SOFT-NEXT: vmov d16, r3, r2 2061; SOFT-NEXT: vmov d17, r1, r0 2062; SOFT-NEXT: vadd.f64 d19, d16, d16 2063; SOFT-NEXT: vadd.f64 d18, d17, d17 2064; SOFT-NEXT: vst1.64 {d18, d19}, [r4] 2065; SOFT-NEXT: pop {r4, pc} 2066; 2067; HARD-LABEL: test_v2f64_v4f32: 2068; HARD: @ %bb.0: 2069; HARD-NEXT: .save {r4, lr} 2070; HARD-NEXT: push {r4, lr} 2071; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2072; HARD-NEXT: mov r4, r1 2073; HARD-NEXT: vrev64.32 q8, q8 2074; HARD-NEXT: vadd.f32 q8, q8, q8 2075; HARD-NEXT: vrev64.32 q0, q8 2076; HARD-NEXT: bl test_v2f64_v4f32_helper 2077; HARD-NEXT: vadd.f64 d17, d1, d1 2078; HARD-NEXT: vadd.f64 d16, d0, d0 2079; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2080; HARD-NEXT: pop {r4, pc} 2081 %1 = load <4 x float>, ptr %p 2082 %2 = fadd <4 x float> %1, %1 2083 %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2) 2084 %4 = fadd <2 x double> %3, %3 2085 store <2 x double> %4, ptr %q 2086 ret void 2087} 2088 2089declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p) 2090define void @test_v2f64_v4i32(ptr %p, ptr %q) { 2091; SOFT-LABEL: test_v2f64_v4i32: 2092; SOFT: @ %bb.0: 2093; SOFT-NEXT: .save {r4, lr} 2094; SOFT-NEXT: push {r4, lr} 2095; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2096; SOFT-NEXT: mov r4, r1 2097; SOFT-NEXT: vrev64.32 q8, q8 2098; SOFT-NEXT: vadd.i32 q8, q8, q8 2099; SOFT-NEXT: vrev64.32 q8, q8 2100; SOFT-NEXT: vmov r1, r0, d16 2101; SOFT-NEXT: vmov r3, r2, d17 2102; SOFT-NEXT: bl test_v2f64_v4i32_helper 2103; SOFT-NEXT: vmov d16, r3, r2 2104; SOFT-NEXT: vmov d17, r1, r0 2105; SOFT-NEXT: vadd.f64 d19, d16, d16 2106; SOFT-NEXT: vadd.f64 d18, d17, d17 2107; SOFT-NEXT: vst1.64 {d18, d19}, [r4] 2108; SOFT-NEXT: pop {r4, pc} 2109; 2110; HARD-LABEL: test_v2f64_v4i32: 2111; HARD: @ %bb.0: 2112; HARD-NEXT: .save {r4, lr} 2113; HARD-NEXT: push {r4, lr} 2114; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2115; HARD-NEXT: mov r4, r1 2116; HARD-NEXT: vrev64.32 q8, q8 2117; HARD-NEXT: vadd.i32 q8, q8, q8 2118; HARD-NEXT: vrev64.32 q0, q8 2119; HARD-NEXT: bl test_v2f64_v4i32_helper 2120; HARD-NEXT: vadd.f64 d17, d1, d1 2121; HARD-NEXT: vadd.f64 d16, d0, d0 2122; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2123; HARD-NEXT: pop {r4, pc} 2124 %1 = load <4 x i32>, ptr %p 2125 %2 = add <4 x i32> %1, %1 2126 %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2) 2127 %4 = fadd <2 x double> %3, %3 2128 store <2 x double> %4, ptr %q 2129 ret void 2130} 2131 2132declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p) 2133define void @test_v2f64_v8i16(ptr %p, ptr %q) { 2134; SOFT-LABEL: test_v2f64_v8i16: 2135; SOFT: @ %bb.0: 2136; SOFT-NEXT: .save {r4, lr} 2137; SOFT-NEXT: push {r4, lr} 2138; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2139; SOFT-NEXT: mov r4, r1 2140; SOFT-NEXT: vrev64.16 q8, q8 2141; SOFT-NEXT: vadd.i16 q8, q8, q8 2142; SOFT-NEXT: vrev64.16 q8, q8 2143; SOFT-NEXT: vmov r1, r0, d16 2144; SOFT-NEXT: vmov r3, r2, d17 2145; SOFT-NEXT: bl test_v2f64_v8i16_helper 2146; SOFT-NEXT: vmov d16, r3, r2 2147; SOFT-NEXT: vmov d17, r1, r0 2148; SOFT-NEXT: vadd.f64 d19, d16, d16 2149; SOFT-NEXT: vadd.f64 d18, d17, d17 2150; SOFT-NEXT: vst1.64 {d18, d19}, [r4] 2151; SOFT-NEXT: pop {r4, pc} 2152; 2153; HARD-LABEL: test_v2f64_v8i16: 2154; HARD: @ %bb.0: 2155; HARD-NEXT: .save {r4, lr} 2156; HARD-NEXT: push {r4, lr} 2157; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2158; HARD-NEXT: mov r4, r1 2159; HARD-NEXT: vrev64.16 q8, q8 2160; HARD-NEXT: vadd.i16 q8, q8, q8 2161; HARD-NEXT: vrev64.16 q0, q8 2162; HARD-NEXT: bl test_v2f64_v8i16_helper 2163; HARD-NEXT: vadd.f64 d17, d1, d1 2164; HARD-NEXT: vadd.f64 d16, d0, d0 2165; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2166; HARD-NEXT: pop {r4, pc} 2167 %1 = load <8 x i16>, ptr %p 2168 %2 = add <8 x i16> %1, %1 2169 %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2) 2170 %4 = fadd <2 x double> %3, %3 2171 store <2 x double> %4, ptr %q 2172 ret void 2173} 2174 2175declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p) 2176define void @test_v2f64_v16i8(ptr %p, ptr %q) { 2177; SOFT-LABEL: test_v2f64_v16i8: 2178; SOFT: @ %bb.0: 2179; SOFT-NEXT: .save {r4, lr} 2180; SOFT-NEXT: push {r4, lr} 2181; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2182; SOFT-NEXT: mov r4, r1 2183; SOFT-NEXT: vrev64.8 q8, q8 2184; SOFT-NEXT: vadd.i8 q8, q8, q8 2185; SOFT-NEXT: vrev64.8 q8, q8 2186; SOFT-NEXT: vmov r1, r0, d16 2187; SOFT-NEXT: vmov r3, r2, d17 2188; SOFT-NEXT: bl test_v2f64_v16i8_helper 2189; SOFT-NEXT: vmov d16, r3, r2 2190; SOFT-NEXT: vmov d17, r1, r0 2191; SOFT-NEXT: vadd.f64 d19, d16, d16 2192; SOFT-NEXT: vadd.f64 d18, d17, d17 2193; SOFT-NEXT: vst1.64 {d18, d19}, [r4] 2194; SOFT-NEXT: pop {r4, pc} 2195; 2196; HARD-LABEL: test_v2f64_v16i8: 2197; HARD: @ %bb.0: 2198; HARD-NEXT: .save {r4, lr} 2199; HARD-NEXT: push {r4, lr} 2200; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2201; HARD-NEXT: mov r4, r1 2202; HARD-NEXT: vrev64.8 q8, q8 2203; HARD-NEXT: vadd.i8 q8, q8, q8 2204; HARD-NEXT: vrev64.8 q0, q8 2205; HARD-NEXT: bl test_v2f64_v16i8_helper 2206; HARD-NEXT: vadd.f64 d17, d1, d1 2207; HARD-NEXT: vadd.f64 d16, d0, d0 2208; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2209; HARD-NEXT: pop {r4, pc} 2210 %1 = load <16 x i8>, ptr %p 2211 %2 = add <16 x i8> %1, %1 2212 %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2) 2213 %4 = fadd <2 x double> %3, %3 2214 store <2 x double> %4, ptr %q 2215 ret void 2216} 2217 2218declare <2 x i64> @test_v2i64_f128_helper(fp128 %p) 2219define void @test_v2i64_f128(ptr %p, ptr %q) { 2220; SOFT-LABEL: test_v2i64_f128: 2221; SOFT: @ %bb.0: 2222; SOFT-NEXT: .save {r4, r5, r11, lr} 2223; SOFT-NEXT: push {r4, r5, r11, lr} 2224; SOFT-NEXT: .pad #16 2225; SOFT-NEXT: sub sp, sp, #16 2226; SOFT-NEXT: ldr r4, [r0] 2227; SOFT-NEXT: mov r5, r1 2228; SOFT-NEXT: ldmib r0, {r1, r2, r3} 2229; SOFT-NEXT: mov r0, r4 2230; SOFT-NEXT: str r4, [sp] 2231; SOFT-NEXT: stmib sp, {r1, r2, r3} 2232; SOFT-NEXT: bl __addtf3 2233; SOFT-NEXT: bl test_v2i64_f128_helper 2234; SOFT-NEXT: vmov d17, r3, r2 2235; SOFT-NEXT: vmov d16, r1, r0 2236; SOFT-NEXT: vadd.i64 q8, q8, q8 2237; SOFT-NEXT: vst1.64 {d16, d17}, [r5] 2238; SOFT-NEXT: add sp, sp, #16 2239; SOFT-NEXT: pop {r4, r5, r11, pc} 2240; 2241; HARD-LABEL: test_v2i64_f128: 2242; HARD: @ %bb.0: 2243; HARD-NEXT: .save {r4, r5, r11, lr} 2244; HARD-NEXT: push {r4, r5, r11, lr} 2245; HARD-NEXT: .pad #16 2246; HARD-NEXT: sub sp, sp, #16 2247; HARD-NEXT: ldr r4, [r0] 2248; HARD-NEXT: mov r5, r1 2249; HARD-NEXT: ldmib r0, {r1, r2, r3} 2250; HARD-NEXT: mov r0, r4 2251; HARD-NEXT: str r4, [sp] 2252; HARD-NEXT: stmib sp, {r1, r2, r3} 2253; HARD-NEXT: bl __addtf3 2254; HARD-NEXT: bl test_v2i64_f128_helper 2255; HARD-NEXT: vadd.i64 q8, q0, q0 2256; HARD-NEXT: vst1.64 {d16, d17}, [r5] 2257; HARD-NEXT: add sp, sp, #16 2258; HARD-NEXT: pop {r4, r5, r11, pc} 2259 %1 = load fp128, ptr %p 2260 %2 = fadd fp128 %1, %1 2261 %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2) 2262 %4 = add <2 x i64> %3, %3 2263 store <2 x i64> %4, ptr %q 2264 ret void 2265} 2266 2267declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p) 2268define void @test_v2i64_v2f64(ptr %p, ptr %q) { 2269; SOFT-LABEL: test_v2i64_v2f64: 2270; SOFT: @ %bb.0: 2271; SOFT-NEXT: .save {r4, lr} 2272; SOFT-NEXT: push {r4, lr} 2273; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2274; SOFT-NEXT: mov r4, r1 2275; SOFT-NEXT: vadd.f64 d18, d16, d16 2276; SOFT-NEXT: vadd.f64 d16, d17, d17 2277; SOFT-NEXT: vmov r1, r0, d18 2278; SOFT-NEXT: vmov r3, r2, d16 2279; SOFT-NEXT: bl test_v2i64_v2f64_helper 2280; SOFT-NEXT: vmov d17, r3, r2 2281; SOFT-NEXT: vmov d16, r1, r0 2282; SOFT-NEXT: vadd.i64 q8, q8, q8 2283; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2284; SOFT-NEXT: pop {r4, pc} 2285; 2286; HARD-LABEL: test_v2i64_v2f64: 2287; HARD: @ %bb.0: 2288; HARD-NEXT: .save {r4, lr} 2289; HARD-NEXT: push {r4, lr} 2290; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2291; HARD-NEXT: mov r4, r1 2292; HARD-NEXT: vadd.f64 d1, d17, d17 2293; HARD-NEXT: vadd.f64 d0, d16, d16 2294; HARD-NEXT: bl test_v2i64_v2f64_helper 2295; HARD-NEXT: vadd.i64 q8, q0, q0 2296; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2297; HARD-NEXT: pop {r4, pc} 2298 %1 = load <2 x double>, ptr %p 2299 %2 = fadd <2 x double> %1, %1 2300 %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2) 2301 %4 = add <2 x i64> %3, %3 2302 store <2 x i64> %4, ptr %q 2303 ret void 2304} 2305 2306declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p) 2307define void @test_v2i64_v4f32(ptr %p, ptr %q) { 2308; SOFT-LABEL: test_v2i64_v4f32: 2309; SOFT: @ %bb.0: 2310; SOFT-NEXT: .save {r4, lr} 2311; SOFT-NEXT: push {r4, lr} 2312; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2313; SOFT-NEXT: mov r4, r1 2314; SOFT-NEXT: vrev64.32 q8, q8 2315; SOFT-NEXT: vadd.f32 q8, q8, q8 2316; SOFT-NEXT: vrev64.32 q8, q8 2317; SOFT-NEXT: vmov r1, r0, d16 2318; SOFT-NEXT: vmov r3, r2, d17 2319; SOFT-NEXT: bl test_v2i64_v4f32_helper 2320; SOFT-NEXT: vmov d17, r3, r2 2321; SOFT-NEXT: vmov d16, r1, r0 2322; SOFT-NEXT: vadd.i64 q8, q8, q8 2323; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2324; SOFT-NEXT: pop {r4, pc} 2325; 2326; HARD-LABEL: test_v2i64_v4f32: 2327; HARD: @ %bb.0: 2328; HARD-NEXT: .save {r4, lr} 2329; HARD-NEXT: push {r4, lr} 2330; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2331; HARD-NEXT: mov r4, r1 2332; HARD-NEXT: vrev64.32 q8, q8 2333; HARD-NEXT: vadd.f32 q8, q8, q8 2334; HARD-NEXT: vrev64.32 q0, q8 2335; HARD-NEXT: bl test_v2i64_v4f32_helper 2336; HARD-NEXT: vadd.i64 q8, q0, q0 2337; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2338; HARD-NEXT: pop {r4, pc} 2339 %1 = load <4 x float>, ptr %p 2340 %2 = fadd <4 x float> %1, %1 2341 %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2) 2342 %4 = add <2 x i64> %3, %3 2343 store <2 x i64> %4, ptr %q 2344 ret void 2345} 2346 2347declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p) 2348define void @test_v2i64_v4i32(ptr %p, ptr %q) { 2349; SOFT-LABEL: test_v2i64_v4i32: 2350; SOFT: @ %bb.0: 2351; SOFT-NEXT: .save {r4, lr} 2352; SOFT-NEXT: push {r4, lr} 2353; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2354; SOFT-NEXT: mov r4, r1 2355; SOFT-NEXT: vrev64.32 q8, q8 2356; SOFT-NEXT: vadd.i32 q8, q8, q8 2357; SOFT-NEXT: vrev64.32 q8, q8 2358; SOFT-NEXT: vmov r1, r0, d16 2359; SOFT-NEXT: vmov r3, r2, d17 2360; SOFT-NEXT: bl test_v2i64_v4i32_helper 2361; SOFT-NEXT: vmov d17, r3, r2 2362; SOFT-NEXT: vmov d16, r1, r0 2363; SOFT-NEXT: vadd.i64 q8, q8, q8 2364; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2365; SOFT-NEXT: pop {r4, pc} 2366; 2367; HARD-LABEL: test_v2i64_v4i32: 2368; HARD: @ %bb.0: 2369; HARD-NEXT: .save {r4, lr} 2370; HARD-NEXT: push {r4, lr} 2371; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2372; HARD-NEXT: mov r4, r1 2373; HARD-NEXT: vrev64.32 q8, q8 2374; HARD-NEXT: vadd.i32 q8, q8, q8 2375; HARD-NEXT: vrev64.32 q0, q8 2376; HARD-NEXT: bl test_v2i64_v4i32_helper 2377; HARD-NEXT: vadd.i64 q8, q0, q0 2378; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2379; HARD-NEXT: pop {r4, pc} 2380 %1 = load <4 x i32>, ptr %p 2381 %2 = add <4 x i32> %1, %1 2382 %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2) 2383 %4 = add <2 x i64> %3, %3 2384 store <2 x i64> %4, ptr %q 2385 ret void 2386} 2387 2388declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p) 2389define void @test_v2i64_v8i16(ptr %p, ptr %q) { 2390; SOFT-LABEL: test_v2i64_v8i16: 2391; SOFT: @ %bb.0: 2392; SOFT-NEXT: .save {r4, lr} 2393; SOFT-NEXT: push {r4, lr} 2394; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2395; SOFT-NEXT: mov r4, r1 2396; SOFT-NEXT: vrev64.16 q8, q8 2397; SOFT-NEXT: vadd.i16 q8, q8, q8 2398; SOFT-NEXT: vrev64.16 q8, q8 2399; SOFT-NEXT: vmov r1, r0, d16 2400; SOFT-NEXT: vmov r3, r2, d17 2401; SOFT-NEXT: bl test_v2i64_v8i16_helper 2402; SOFT-NEXT: vmov d17, r3, r2 2403; SOFT-NEXT: vmov d16, r1, r0 2404; SOFT-NEXT: vadd.i64 q8, q8, q8 2405; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2406; SOFT-NEXT: pop {r4, pc} 2407; 2408; HARD-LABEL: test_v2i64_v8i16: 2409; HARD: @ %bb.0: 2410; HARD-NEXT: .save {r4, lr} 2411; HARD-NEXT: push {r4, lr} 2412; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2413; HARD-NEXT: mov r4, r1 2414; HARD-NEXT: vrev64.16 q8, q8 2415; HARD-NEXT: vadd.i16 q8, q8, q8 2416; HARD-NEXT: vrev64.16 q0, q8 2417; HARD-NEXT: bl test_v2i64_v8i16_helper 2418; HARD-NEXT: vadd.i64 q8, q0, q0 2419; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2420; HARD-NEXT: pop {r4, pc} 2421 %1 = load <8 x i16>, ptr %p 2422 %2 = add <8 x i16> %1, %1 2423 %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2) 2424 %4 = add <2 x i64> %3, %3 2425 store <2 x i64> %4, ptr %q 2426 ret void 2427} 2428 2429declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p) 2430define void @test_v2i64_v16i8(ptr %p, ptr %q) { 2431; SOFT-LABEL: test_v2i64_v16i8: 2432; SOFT: @ %bb.0: 2433; SOFT-NEXT: .save {r4, lr} 2434; SOFT-NEXT: push {r4, lr} 2435; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2436; SOFT-NEXT: mov r4, r1 2437; SOFT-NEXT: vrev64.8 q8, q8 2438; SOFT-NEXT: vadd.i8 q8, q8, q8 2439; SOFT-NEXT: vrev64.8 q8, q8 2440; SOFT-NEXT: vmov r1, r0, d16 2441; SOFT-NEXT: vmov r3, r2, d17 2442; SOFT-NEXT: bl test_v2i64_v16i8_helper 2443; SOFT-NEXT: vmov d17, r3, r2 2444; SOFT-NEXT: vmov d16, r1, r0 2445; SOFT-NEXT: vadd.i64 q8, q8, q8 2446; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2447; SOFT-NEXT: pop {r4, pc} 2448; 2449; HARD-LABEL: test_v2i64_v16i8: 2450; HARD: @ %bb.0: 2451; HARD-NEXT: .save {r4, lr} 2452; HARD-NEXT: push {r4, lr} 2453; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2454; HARD-NEXT: mov r4, r1 2455; HARD-NEXT: vrev64.8 q8, q8 2456; HARD-NEXT: vadd.i8 q8, q8, q8 2457; HARD-NEXT: vrev64.8 q0, q8 2458; HARD-NEXT: bl test_v2i64_v16i8_helper 2459; HARD-NEXT: vadd.i64 q8, q0, q0 2460; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2461; HARD-NEXT: pop {r4, pc} 2462 %1 = load <16 x i8>, ptr %p 2463 %2 = add <16 x i8> %1, %1 2464 %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2) 2465 %4 = add <2 x i64> %3, %3 2466 store <2 x i64> %4, ptr %q 2467 ret void 2468} 2469 2470declare <4 x float> @test_v4f32_f128_helper(fp128 %p) 2471define void @test_v4f32_f128(ptr %p, ptr %q) { 2472; SOFT-LABEL: test_v4f32_f128: 2473; SOFT: @ %bb.0: 2474; SOFT-NEXT: .save {r4, r5, r11, lr} 2475; SOFT-NEXT: push {r4, r5, r11, lr} 2476; SOFT-NEXT: .pad #16 2477; SOFT-NEXT: sub sp, sp, #16 2478; SOFT-NEXT: ldr r4, [r0] 2479; SOFT-NEXT: mov r5, r1 2480; SOFT-NEXT: ldmib r0, {r1, r2, r3} 2481; SOFT-NEXT: mov r0, r4 2482; SOFT-NEXT: str r4, [sp] 2483; SOFT-NEXT: stmib sp, {r1, r2, r3} 2484; SOFT-NEXT: bl __addtf3 2485; SOFT-NEXT: bl test_v4f32_f128_helper 2486; SOFT-NEXT: vmov d17, r3, r2 2487; SOFT-NEXT: vmov d16, r1, r0 2488; SOFT-NEXT: vrev64.32 q8, q8 2489; SOFT-NEXT: vadd.f32 q8, q8, q8 2490; SOFT-NEXT: vrev64.32 q8, q8 2491; SOFT-NEXT: vst1.64 {d16, d17}, [r5] 2492; SOFT-NEXT: add sp, sp, #16 2493; SOFT-NEXT: pop {r4, r5, r11, pc} 2494; 2495; HARD-LABEL: test_v4f32_f128: 2496; HARD: @ %bb.0: 2497; HARD-NEXT: .save {r4, r5, r11, lr} 2498; HARD-NEXT: push {r4, r5, r11, lr} 2499; HARD-NEXT: .pad #16 2500; HARD-NEXT: sub sp, sp, #16 2501; HARD-NEXT: ldr r4, [r0] 2502; HARD-NEXT: mov r5, r1 2503; HARD-NEXT: ldmib r0, {r1, r2, r3} 2504; HARD-NEXT: mov r0, r4 2505; HARD-NEXT: str r4, [sp] 2506; HARD-NEXT: stmib sp, {r1, r2, r3} 2507; HARD-NEXT: bl __addtf3 2508; HARD-NEXT: bl test_v4f32_f128_helper 2509; HARD-NEXT: vrev64.32 q8, q0 2510; HARD-NEXT: vadd.f32 q8, q8, q8 2511; HARD-NEXT: vrev64.32 q8, q8 2512; HARD-NEXT: vst1.64 {d16, d17}, [r5] 2513; HARD-NEXT: add sp, sp, #16 2514; HARD-NEXT: pop {r4, r5, r11, pc} 2515 %1 = load fp128, ptr %p 2516 %2 = fadd fp128 %1, %1 2517 %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2) 2518 %4 = fadd <4 x float> %3, %3 2519 store <4 x float> %4, ptr %q 2520 ret void 2521} 2522 2523declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p) 2524define void @test_v4f32_v2f64(ptr %p, ptr %q) { 2525; SOFT-LABEL: test_v4f32_v2f64: 2526; SOFT: @ %bb.0: 2527; SOFT-NEXT: .save {r4, lr} 2528; SOFT-NEXT: push {r4, lr} 2529; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2530; SOFT-NEXT: mov r4, r1 2531; SOFT-NEXT: vadd.f64 d18, d16, d16 2532; SOFT-NEXT: vadd.f64 d16, d17, d17 2533; SOFT-NEXT: vmov r1, r0, d18 2534; SOFT-NEXT: vmov r3, r2, d16 2535; SOFT-NEXT: bl test_v4f32_v2f64_helper 2536; SOFT-NEXT: vmov d17, r3, r2 2537; SOFT-NEXT: vmov d16, r1, r0 2538; SOFT-NEXT: vrev64.32 q8, q8 2539; SOFT-NEXT: vadd.f32 q8, q8, q8 2540; SOFT-NEXT: vrev64.32 q8, q8 2541; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2542; SOFT-NEXT: pop {r4, pc} 2543; 2544; HARD-LABEL: test_v4f32_v2f64: 2545; HARD: @ %bb.0: 2546; HARD-NEXT: .save {r4, lr} 2547; HARD-NEXT: push {r4, lr} 2548; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2549; HARD-NEXT: mov r4, r1 2550; HARD-NEXT: vadd.f64 d1, d17, d17 2551; HARD-NEXT: vadd.f64 d0, d16, d16 2552; HARD-NEXT: bl test_v4f32_v2f64_helper 2553; HARD-NEXT: vrev64.32 q8, q0 2554; HARD-NEXT: vadd.f32 q8, q8, q8 2555; HARD-NEXT: vrev64.32 q8, q8 2556; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2557; HARD-NEXT: pop {r4, pc} 2558 %1 = load <2 x double>, ptr %p 2559 %2 = fadd <2 x double> %1, %1 2560 %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2) 2561 %4 = fadd <4 x float> %3, %3 2562 store <4 x float> %4, ptr %q 2563 ret void 2564} 2565 2566declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p) 2567define void @test_v4f32_v2i64(ptr %p, ptr %q) { 2568; SOFT-LABEL: test_v4f32_v2i64: 2569; SOFT: @ %bb.0: 2570; SOFT-NEXT: .save {r4, lr} 2571; SOFT-NEXT: push {r4, lr} 2572; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2573; SOFT-NEXT: mov r4, r1 2574; SOFT-NEXT: vadd.i64 q8, q8, q8 2575; SOFT-NEXT: vmov r1, r0, d16 2576; SOFT-NEXT: vmov r3, r2, d17 2577; SOFT-NEXT: bl test_v4f32_v2i64_helper 2578; SOFT-NEXT: vmov d17, r3, r2 2579; SOFT-NEXT: vmov d16, r1, r0 2580; SOFT-NEXT: vrev64.32 q8, q8 2581; SOFT-NEXT: vadd.f32 q8, q8, q8 2582; SOFT-NEXT: vrev64.32 q8, q8 2583; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2584; SOFT-NEXT: pop {r4, pc} 2585; 2586; HARD-LABEL: test_v4f32_v2i64: 2587; HARD: @ %bb.0: 2588; HARD-NEXT: .save {r4, lr} 2589; HARD-NEXT: push {r4, lr} 2590; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2591; HARD-NEXT: mov r4, r1 2592; HARD-NEXT: vadd.i64 q0, q8, q8 2593; HARD-NEXT: bl test_v4f32_v2i64_helper 2594; HARD-NEXT: vrev64.32 q8, q0 2595; HARD-NEXT: vadd.f32 q8, q8, q8 2596; HARD-NEXT: vrev64.32 q8, q8 2597; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2598; HARD-NEXT: pop {r4, pc} 2599 %1 = load <2 x i64>, ptr %p 2600 %2 = add <2 x i64> %1, %1 2601 %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2) 2602 %4 = fadd <4 x float> %3, %3 2603 store <4 x float> %4, ptr %q 2604 ret void 2605} 2606 2607declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p) 2608define void @test_v4f32_v4i32(ptr %p, ptr %q) { 2609; SOFT-LABEL: test_v4f32_v4i32: 2610; SOFT: @ %bb.0: 2611; SOFT-NEXT: .save {r4, lr} 2612; SOFT-NEXT: push {r4, lr} 2613; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2614; SOFT-NEXT: mov r4, r1 2615; SOFT-NEXT: vrev64.32 q8, q8 2616; SOFT-NEXT: vadd.i32 q8, q8, q8 2617; SOFT-NEXT: vrev64.32 q8, q8 2618; SOFT-NEXT: vmov r1, r0, d16 2619; SOFT-NEXT: vmov r3, r2, d17 2620; SOFT-NEXT: bl test_v4f32_v4i32_helper 2621; SOFT-NEXT: vmov d17, r3, r2 2622; SOFT-NEXT: vmov d16, r1, r0 2623; SOFT-NEXT: vrev64.32 q8, q8 2624; SOFT-NEXT: vadd.f32 q8, q8, q8 2625; SOFT-NEXT: vrev64.32 q8, q8 2626; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2627; SOFT-NEXT: pop {r4, pc} 2628; 2629; HARD-LABEL: test_v4f32_v4i32: 2630; HARD: @ %bb.0: 2631; HARD-NEXT: .save {r4, lr} 2632; HARD-NEXT: push {r4, lr} 2633; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2634; HARD-NEXT: mov r4, r1 2635; HARD-NEXT: vrev64.32 q8, q8 2636; HARD-NEXT: vadd.i32 q8, q8, q8 2637; HARD-NEXT: vrev64.32 q0, q8 2638; HARD-NEXT: bl test_v4f32_v4i32_helper 2639; HARD-NEXT: vrev64.32 q8, q0 2640; HARD-NEXT: vadd.f32 q8, q8, q8 2641; HARD-NEXT: vrev64.32 q8, q8 2642; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2643; HARD-NEXT: pop {r4, pc} 2644 %1 = load <4 x i32>, ptr %p 2645 %2 = add <4 x i32> %1, %1 2646 %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2) 2647 %4 = fadd <4 x float> %3, %3 2648 store <4 x float> %4, ptr %q 2649 ret void 2650} 2651 2652declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p) 2653define void @test_v4f32_v8i16(ptr %p, ptr %q) { 2654; SOFT-LABEL: test_v4f32_v8i16: 2655; SOFT: @ %bb.0: 2656; SOFT-NEXT: .save {r4, lr} 2657; SOFT-NEXT: push {r4, lr} 2658; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2659; SOFT-NEXT: mov r4, r1 2660; SOFT-NEXT: vrev64.16 q8, q8 2661; SOFT-NEXT: vadd.i16 q8, q8, q8 2662; SOFT-NEXT: vrev64.16 q8, q8 2663; SOFT-NEXT: vmov r1, r0, d16 2664; SOFT-NEXT: vmov r3, r2, d17 2665; SOFT-NEXT: bl test_v4f32_v8i16_helper 2666; SOFT-NEXT: vmov d17, r3, r2 2667; SOFT-NEXT: vmov d16, r1, r0 2668; SOFT-NEXT: vrev64.32 q8, q8 2669; SOFT-NEXT: vadd.f32 q8, q8, q8 2670; SOFT-NEXT: vrev64.32 q8, q8 2671; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2672; SOFT-NEXT: pop {r4, pc} 2673; 2674; HARD-LABEL: test_v4f32_v8i16: 2675; HARD: @ %bb.0: 2676; HARD-NEXT: .save {r4, lr} 2677; HARD-NEXT: push {r4, lr} 2678; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2679; HARD-NEXT: mov r4, r1 2680; HARD-NEXT: vrev64.16 q8, q8 2681; HARD-NEXT: vadd.i16 q8, q8, q8 2682; HARD-NEXT: vrev64.16 q0, q8 2683; HARD-NEXT: bl test_v4f32_v8i16_helper 2684; HARD-NEXT: vrev64.32 q8, q0 2685; HARD-NEXT: vadd.f32 q8, q8, q8 2686; HARD-NEXT: vrev64.32 q8, q8 2687; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2688; HARD-NEXT: pop {r4, pc} 2689 %1 = load <8 x i16>, ptr %p 2690 %2 = add <8 x i16> %1, %1 2691 %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2) 2692 %4 = fadd <4 x float> %3, %3 2693 store <4 x float> %4, ptr %q 2694 ret void 2695} 2696 2697declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p) 2698define void @test_v4f32_v16i8(ptr %p, ptr %q) { 2699; SOFT-LABEL: test_v4f32_v16i8: 2700; SOFT: @ %bb.0: 2701; SOFT-NEXT: .save {r4, lr} 2702; SOFT-NEXT: push {r4, lr} 2703; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2704; SOFT-NEXT: mov r4, r1 2705; SOFT-NEXT: vrev64.8 q8, q8 2706; SOFT-NEXT: vadd.i8 q8, q8, q8 2707; SOFT-NEXT: vrev64.8 q8, q8 2708; SOFT-NEXT: vmov r1, r0, d16 2709; SOFT-NEXT: vmov r3, r2, d17 2710; SOFT-NEXT: bl test_v4f32_v16i8_helper 2711; SOFT-NEXT: vmov d17, r3, r2 2712; SOFT-NEXT: vmov d16, r1, r0 2713; SOFT-NEXT: vrev64.32 q8, q8 2714; SOFT-NEXT: vadd.f32 q8, q8, q8 2715; SOFT-NEXT: vrev64.32 q8, q8 2716; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2717; SOFT-NEXT: pop {r4, pc} 2718; 2719; HARD-LABEL: test_v4f32_v16i8: 2720; HARD: @ %bb.0: 2721; HARD-NEXT: .save {r4, lr} 2722; HARD-NEXT: push {r4, lr} 2723; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2724; HARD-NEXT: mov r4, r1 2725; HARD-NEXT: vrev64.8 q8, q8 2726; HARD-NEXT: vadd.i8 q8, q8, q8 2727; HARD-NEXT: vrev64.8 q0, q8 2728; HARD-NEXT: bl test_v4f32_v16i8_helper 2729; HARD-NEXT: vrev64.32 q8, q0 2730; HARD-NEXT: vadd.f32 q8, q8, q8 2731; HARD-NEXT: vrev64.32 q8, q8 2732; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2733; HARD-NEXT: pop {r4, pc} 2734 %1 = load <16 x i8>, ptr %p 2735 %2 = add <16 x i8> %1, %1 2736 %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2) 2737 %4 = fadd <4 x float> %3, %3 2738 store <4 x float> %4, ptr %q 2739 ret void 2740} 2741 2742declare <4 x i32> @test_v4i32_f128_helper(fp128 %p) 2743define void @test_v4i32_f128(ptr %p, ptr %q) { 2744; SOFT-LABEL: test_v4i32_f128: 2745; SOFT: @ %bb.0: 2746; SOFT-NEXT: .save {r4, r5, r11, lr} 2747; SOFT-NEXT: push {r4, r5, r11, lr} 2748; SOFT-NEXT: .pad #16 2749; SOFT-NEXT: sub sp, sp, #16 2750; SOFT-NEXT: ldr r4, [r0] 2751; SOFT-NEXT: mov r5, r1 2752; SOFT-NEXT: ldmib r0, {r1, r2, r3} 2753; SOFT-NEXT: mov r0, r4 2754; SOFT-NEXT: str r4, [sp] 2755; SOFT-NEXT: stmib sp, {r1, r2, r3} 2756; SOFT-NEXT: bl __addtf3 2757; SOFT-NEXT: bl test_v4i32_f128_helper 2758; SOFT-NEXT: vmov d17, r3, r2 2759; SOFT-NEXT: vmov d16, r1, r0 2760; SOFT-NEXT: vrev64.32 q8, q8 2761; SOFT-NEXT: vadd.i32 q8, q8, q8 2762; SOFT-NEXT: vrev64.32 q8, q8 2763; SOFT-NEXT: vst1.64 {d16, d17}, [r5] 2764; SOFT-NEXT: add sp, sp, #16 2765; SOFT-NEXT: pop {r4, r5, r11, pc} 2766; 2767; HARD-LABEL: test_v4i32_f128: 2768; HARD: @ %bb.0: 2769; HARD-NEXT: .save {r4, r5, r11, lr} 2770; HARD-NEXT: push {r4, r5, r11, lr} 2771; HARD-NEXT: .pad #16 2772; HARD-NEXT: sub sp, sp, #16 2773; HARD-NEXT: ldr r4, [r0] 2774; HARD-NEXT: mov r5, r1 2775; HARD-NEXT: ldmib r0, {r1, r2, r3} 2776; HARD-NEXT: mov r0, r4 2777; HARD-NEXT: str r4, [sp] 2778; HARD-NEXT: stmib sp, {r1, r2, r3} 2779; HARD-NEXT: bl __addtf3 2780; HARD-NEXT: bl test_v4i32_f128_helper 2781; HARD-NEXT: vrev64.32 q8, q0 2782; HARD-NEXT: vadd.i32 q8, q8, q8 2783; HARD-NEXT: vrev64.32 q8, q8 2784; HARD-NEXT: vst1.64 {d16, d17}, [r5] 2785; HARD-NEXT: add sp, sp, #16 2786; HARD-NEXT: pop {r4, r5, r11, pc} 2787 %1 = load fp128, ptr %p 2788 %2 = fadd fp128 %1, %1 2789 %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2) 2790 %4 = add <4 x i32> %3, %3 2791 store <4 x i32> %4, ptr %q 2792 ret void 2793} 2794 2795declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p) 2796define void @test_v4i32_v2f64(ptr %p, ptr %q) { 2797; SOFT-LABEL: test_v4i32_v2f64: 2798; SOFT: @ %bb.0: 2799; SOFT-NEXT: .save {r4, lr} 2800; SOFT-NEXT: push {r4, lr} 2801; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2802; SOFT-NEXT: mov r4, r1 2803; SOFT-NEXT: vadd.f64 d18, d16, d16 2804; SOFT-NEXT: vadd.f64 d16, d17, d17 2805; SOFT-NEXT: vmov r1, r0, d18 2806; SOFT-NEXT: vmov r3, r2, d16 2807; SOFT-NEXT: bl test_v4i32_v2f64_helper 2808; SOFT-NEXT: vmov d17, r3, r2 2809; SOFT-NEXT: vmov d16, r1, r0 2810; SOFT-NEXT: vrev64.32 q8, q8 2811; SOFT-NEXT: vadd.i32 q8, q8, q8 2812; SOFT-NEXT: vrev64.32 q8, q8 2813; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2814; SOFT-NEXT: pop {r4, pc} 2815; 2816; HARD-LABEL: test_v4i32_v2f64: 2817; HARD: @ %bb.0: 2818; HARD-NEXT: .save {r4, lr} 2819; HARD-NEXT: push {r4, lr} 2820; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2821; HARD-NEXT: mov r4, r1 2822; HARD-NEXT: vadd.f64 d1, d17, d17 2823; HARD-NEXT: vadd.f64 d0, d16, d16 2824; HARD-NEXT: bl test_v4i32_v2f64_helper 2825; HARD-NEXT: vrev64.32 q8, q0 2826; HARD-NEXT: vadd.i32 q8, q8, q8 2827; HARD-NEXT: vrev64.32 q8, q8 2828; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2829; HARD-NEXT: pop {r4, pc} 2830 %1 = load <2 x double>, ptr %p 2831 %2 = fadd <2 x double> %1, %1 2832 %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2) 2833 %4 = add <4 x i32> %3, %3 2834 store <4 x i32> %4, ptr %q 2835 ret void 2836} 2837 2838declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p) 2839define void @test_v4i32_v2i64(ptr %p, ptr %q) { 2840; SOFT-LABEL: test_v4i32_v2i64: 2841; SOFT: @ %bb.0: 2842; SOFT-NEXT: .save {r4, lr} 2843; SOFT-NEXT: push {r4, lr} 2844; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2845; SOFT-NEXT: mov r4, r1 2846; SOFT-NEXT: vadd.i64 q8, q8, q8 2847; SOFT-NEXT: vmov r1, r0, d16 2848; SOFT-NEXT: vmov r3, r2, d17 2849; SOFT-NEXT: bl test_v4i32_v2i64_helper 2850; SOFT-NEXT: vmov d17, r3, r2 2851; SOFT-NEXT: vmov d16, r1, r0 2852; SOFT-NEXT: vrev64.32 q8, q8 2853; SOFT-NEXT: vadd.i32 q8, q8, q8 2854; SOFT-NEXT: vrev64.32 q8, q8 2855; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2856; SOFT-NEXT: pop {r4, pc} 2857; 2858; HARD-LABEL: test_v4i32_v2i64: 2859; HARD: @ %bb.0: 2860; HARD-NEXT: .save {r4, lr} 2861; HARD-NEXT: push {r4, lr} 2862; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2863; HARD-NEXT: mov r4, r1 2864; HARD-NEXT: vadd.i64 q0, q8, q8 2865; HARD-NEXT: bl test_v4i32_v2i64_helper 2866; HARD-NEXT: vrev64.32 q8, q0 2867; HARD-NEXT: vadd.i32 q8, q8, q8 2868; HARD-NEXT: vrev64.32 q8, q8 2869; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2870; HARD-NEXT: pop {r4, pc} 2871 %1 = load <2 x i64>, ptr %p 2872 %2 = add <2 x i64> %1, %1 2873 %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2) 2874 %4 = add <4 x i32> %3, %3 2875 store <4 x i32> %4, ptr %q 2876 ret void 2877} 2878 2879declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p) 2880define void @test_v4i32_v4f32(ptr %p, ptr %q) { 2881; SOFT-LABEL: test_v4i32_v4f32: 2882; SOFT: @ %bb.0: 2883; SOFT-NEXT: .save {r4, lr} 2884; SOFT-NEXT: push {r4, lr} 2885; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2886; SOFT-NEXT: mov r4, r1 2887; SOFT-NEXT: vrev64.32 q8, q8 2888; SOFT-NEXT: vadd.f32 q8, q8, q8 2889; SOFT-NEXT: vrev64.32 q8, q8 2890; SOFT-NEXT: vmov r1, r0, d16 2891; SOFT-NEXT: vmov r3, r2, d17 2892; SOFT-NEXT: bl test_v4i32_v4f32_helper 2893; SOFT-NEXT: vmov d17, r3, r2 2894; SOFT-NEXT: vmov d16, r1, r0 2895; SOFT-NEXT: vrev64.32 q8, q8 2896; SOFT-NEXT: vadd.i32 q8, q8, q8 2897; SOFT-NEXT: vrev64.32 q8, q8 2898; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2899; SOFT-NEXT: pop {r4, pc} 2900; 2901; HARD-LABEL: test_v4i32_v4f32: 2902; HARD: @ %bb.0: 2903; HARD-NEXT: .save {r4, lr} 2904; HARD-NEXT: push {r4, lr} 2905; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2906; HARD-NEXT: mov r4, r1 2907; HARD-NEXT: vrev64.32 q8, q8 2908; HARD-NEXT: vadd.f32 q8, q8, q8 2909; HARD-NEXT: vrev64.32 q0, q8 2910; HARD-NEXT: bl test_v4i32_v4f32_helper 2911; HARD-NEXT: vrev64.32 q8, q0 2912; HARD-NEXT: vadd.i32 q8, q8, q8 2913; HARD-NEXT: vrev64.32 q8, q8 2914; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2915; HARD-NEXT: pop {r4, pc} 2916 %1 = load <4 x float>, ptr %p 2917 %2 = fadd <4 x float> %1, %1 2918 %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2) 2919 %4 = add <4 x i32> %3, %3 2920 store <4 x i32> %4, ptr %q 2921 ret void 2922} 2923 2924declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p) 2925define void @test_v4i32_v8i16(ptr %p, ptr %q) { 2926; SOFT-LABEL: test_v4i32_v8i16: 2927; SOFT: @ %bb.0: 2928; SOFT-NEXT: .save {r4, lr} 2929; SOFT-NEXT: push {r4, lr} 2930; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2931; SOFT-NEXT: mov r4, r1 2932; SOFT-NEXT: vrev64.16 q8, q8 2933; SOFT-NEXT: vadd.i16 q8, q8, q8 2934; SOFT-NEXT: vrev64.16 q8, q8 2935; SOFT-NEXT: vmov r1, r0, d16 2936; SOFT-NEXT: vmov r3, r2, d17 2937; SOFT-NEXT: bl test_v4i32_v8i16_helper 2938; SOFT-NEXT: vmov d17, r3, r2 2939; SOFT-NEXT: vmov d16, r1, r0 2940; SOFT-NEXT: vrev64.32 q8, q8 2941; SOFT-NEXT: vadd.i32 q8, q8, q8 2942; SOFT-NEXT: vrev64.32 q8, q8 2943; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2944; SOFT-NEXT: pop {r4, pc} 2945; 2946; HARD-LABEL: test_v4i32_v8i16: 2947; HARD: @ %bb.0: 2948; HARD-NEXT: .save {r4, lr} 2949; HARD-NEXT: push {r4, lr} 2950; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2951; HARD-NEXT: mov r4, r1 2952; HARD-NEXT: vrev64.16 q8, q8 2953; HARD-NEXT: vadd.i16 q8, q8, q8 2954; HARD-NEXT: vrev64.16 q0, q8 2955; HARD-NEXT: bl test_v4i32_v8i16_helper 2956; HARD-NEXT: vrev64.32 q8, q0 2957; HARD-NEXT: vadd.i32 q8, q8, q8 2958; HARD-NEXT: vrev64.32 q8, q8 2959; HARD-NEXT: vst1.64 {d16, d17}, [r4] 2960; HARD-NEXT: pop {r4, pc} 2961 %1 = load <8 x i16>, ptr %p 2962 %2 = add <8 x i16> %1, %1 2963 %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2) 2964 %4 = add <4 x i32> %3, %3 2965 store <4 x i32> %4, ptr %q 2966 ret void 2967} 2968 2969declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p) 2970define void @test_v4i32_v16i8(ptr %p, ptr %q) { 2971; SOFT-LABEL: test_v4i32_v16i8: 2972; SOFT: @ %bb.0: 2973; SOFT-NEXT: .save {r4, lr} 2974; SOFT-NEXT: push {r4, lr} 2975; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 2976; SOFT-NEXT: mov r4, r1 2977; SOFT-NEXT: vrev64.8 q8, q8 2978; SOFT-NEXT: vadd.i8 q8, q8, q8 2979; SOFT-NEXT: vrev64.8 q8, q8 2980; SOFT-NEXT: vmov r1, r0, d16 2981; SOFT-NEXT: vmov r3, r2, d17 2982; SOFT-NEXT: bl test_v4i32_v16i8_helper 2983; SOFT-NEXT: vmov d17, r3, r2 2984; SOFT-NEXT: vmov d16, r1, r0 2985; SOFT-NEXT: vrev64.32 q8, q8 2986; SOFT-NEXT: vadd.i32 q8, q8, q8 2987; SOFT-NEXT: vrev64.32 q8, q8 2988; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 2989; SOFT-NEXT: pop {r4, pc} 2990; 2991; HARD-LABEL: test_v4i32_v16i8: 2992; HARD: @ %bb.0: 2993; HARD-NEXT: .save {r4, lr} 2994; HARD-NEXT: push {r4, lr} 2995; HARD-NEXT: vld1.64 {d16, d17}, [r0] 2996; HARD-NEXT: mov r4, r1 2997; HARD-NEXT: vrev64.8 q8, q8 2998; HARD-NEXT: vadd.i8 q8, q8, q8 2999; HARD-NEXT: vrev64.8 q0, q8 3000; HARD-NEXT: bl test_v4i32_v16i8_helper 3001; HARD-NEXT: vrev64.32 q8, q0 3002; HARD-NEXT: vadd.i32 q8, q8, q8 3003; HARD-NEXT: vrev64.32 q8, q8 3004; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3005; HARD-NEXT: pop {r4, pc} 3006 %1 = load <16 x i8>, ptr %p 3007 %2 = add <16 x i8> %1, %1 3008 %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2) 3009 %4 = add <4 x i32> %3, %3 3010 store <4 x i32> %4, ptr %q 3011 ret void 3012} 3013 3014declare <8 x i16> @test_v8i16_f128_helper(fp128 %p) 3015define void @test_v8i16_f128(ptr %p, ptr %q) { 3016; SOFT-LABEL: test_v8i16_f128: 3017; SOFT: @ %bb.0: 3018; SOFT-NEXT: .save {r4, r5, r11, lr} 3019; SOFT-NEXT: push {r4, r5, r11, lr} 3020; SOFT-NEXT: .pad #16 3021; SOFT-NEXT: sub sp, sp, #16 3022; SOFT-NEXT: ldr r4, [r0] 3023; SOFT-NEXT: mov r5, r1 3024; SOFT-NEXT: ldmib r0, {r1, r2, r3} 3025; SOFT-NEXT: mov r0, r4 3026; SOFT-NEXT: str r4, [sp] 3027; SOFT-NEXT: stmib sp, {r1, r2, r3} 3028; SOFT-NEXT: bl __addtf3 3029; SOFT-NEXT: bl test_v8i16_f128_helper 3030; SOFT-NEXT: vmov d17, r3, r2 3031; SOFT-NEXT: vmov d16, r1, r0 3032; SOFT-NEXT: vrev64.16 q8, q8 3033; SOFT-NEXT: vadd.i16 q8, q8, q8 3034; SOFT-NEXT: vrev64.16 q8, q8 3035; SOFT-NEXT: vst1.64 {d16, d17}, [r5] 3036; SOFT-NEXT: add sp, sp, #16 3037; SOFT-NEXT: pop {r4, r5, r11, pc} 3038; 3039; HARD-LABEL: test_v8i16_f128: 3040; HARD: @ %bb.0: 3041; HARD-NEXT: .save {r4, r5, r11, lr} 3042; HARD-NEXT: push {r4, r5, r11, lr} 3043; HARD-NEXT: .pad #16 3044; HARD-NEXT: sub sp, sp, #16 3045; HARD-NEXT: ldr r4, [r0] 3046; HARD-NEXT: mov r5, r1 3047; HARD-NEXT: ldmib r0, {r1, r2, r3} 3048; HARD-NEXT: mov r0, r4 3049; HARD-NEXT: str r4, [sp] 3050; HARD-NEXT: stmib sp, {r1, r2, r3} 3051; HARD-NEXT: bl __addtf3 3052; HARD-NEXT: bl test_v8i16_f128_helper 3053; HARD-NEXT: vrev64.16 q8, q0 3054; HARD-NEXT: vadd.i16 q8, q8, q8 3055; HARD-NEXT: vrev64.16 q8, q8 3056; HARD-NEXT: vst1.64 {d16, d17}, [r5] 3057; HARD-NEXT: add sp, sp, #16 3058; HARD-NEXT: pop {r4, r5, r11, pc} 3059 %1 = load fp128, ptr %p 3060 %2 = fadd fp128 %1, %1 3061 %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2) 3062 %4 = add <8 x i16> %3, %3 3063 store <8 x i16> %4, ptr %q 3064 ret void 3065} 3066 3067declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p) 3068define void @test_v8i16_v2f64(ptr %p, ptr %q) { 3069; SOFT-LABEL: test_v8i16_v2f64: 3070; SOFT: @ %bb.0: 3071; SOFT-NEXT: .save {r4, lr} 3072; SOFT-NEXT: push {r4, lr} 3073; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3074; SOFT-NEXT: mov r4, r1 3075; SOFT-NEXT: vadd.f64 d18, d16, d16 3076; SOFT-NEXT: vadd.f64 d16, d17, d17 3077; SOFT-NEXT: vmov r1, r0, d18 3078; SOFT-NEXT: vmov r3, r2, d16 3079; SOFT-NEXT: bl test_v8i16_v2f64_helper 3080; SOFT-NEXT: vmov d17, r3, r2 3081; SOFT-NEXT: vmov d16, r1, r0 3082; SOFT-NEXT: vrev64.16 q8, q8 3083; SOFT-NEXT: vadd.i16 q8, q8, q8 3084; SOFT-NEXT: vrev64.16 q8, q8 3085; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3086; SOFT-NEXT: pop {r4, pc} 3087; 3088; HARD-LABEL: test_v8i16_v2f64: 3089; HARD: @ %bb.0: 3090; HARD-NEXT: .save {r4, lr} 3091; HARD-NEXT: push {r4, lr} 3092; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3093; HARD-NEXT: mov r4, r1 3094; HARD-NEXT: vadd.f64 d1, d17, d17 3095; HARD-NEXT: vadd.f64 d0, d16, d16 3096; HARD-NEXT: bl test_v8i16_v2f64_helper 3097; HARD-NEXT: vrev64.16 q8, q0 3098; HARD-NEXT: vadd.i16 q8, q8, q8 3099; HARD-NEXT: vrev64.16 q8, q8 3100; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3101; HARD-NEXT: pop {r4, pc} 3102 %1 = load <2 x double>, ptr %p 3103 %2 = fadd <2 x double> %1, %1 3104 %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2) 3105 %4 = add <8 x i16> %3, %3 3106 store <8 x i16> %4, ptr %q 3107 ret void 3108} 3109 3110declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p) 3111define void @test_v8i16_v2i64(ptr %p, ptr %q) { 3112; SOFT-LABEL: test_v8i16_v2i64: 3113; SOFT: @ %bb.0: 3114; SOFT-NEXT: .save {r4, lr} 3115; SOFT-NEXT: push {r4, lr} 3116; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3117; SOFT-NEXT: mov r4, r1 3118; SOFT-NEXT: vadd.i64 q8, q8, q8 3119; SOFT-NEXT: vmov r1, r0, d16 3120; SOFT-NEXT: vmov r3, r2, d17 3121; SOFT-NEXT: bl test_v8i16_v2i64_helper 3122; SOFT-NEXT: vmov d17, r3, r2 3123; SOFT-NEXT: vmov d16, r1, r0 3124; SOFT-NEXT: vrev64.16 q8, q8 3125; SOFT-NEXT: vadd.i16 q8, q8, q8 3126; SOFT-NEXT: vrev64.16 q8, q8 3127; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3128; SOFT-NEXT: pop {r4, pc} 3129; 3130; HARD-LABEL: test_v8i16_v2i64: 3131; HARD: @ %bb.0: 3132; HARD-NEXT: .save {r4, lr} 3133; HARD-NEXT: push {r4, lr} 3134; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3135; HARD-NEXT: mov r4, r1 3136; HARD-NEXT: vadd.i64 q0, q8, q8 3137; HARD-NEXT: bl test_v8i16_v2i64_helper 3138; HARD-NEXT: vrev64.16 q8, q0 3139; HARD-NEXT: vadd.i16 q8, q8, q8 3140; HARD-NEXT: vrev64.16 q8, q8 3141; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3142; HARD-NEXT: pop {r4, pc} 3143 %1 = load <2 x i64>, ptr %p 3144 %2 = add <2 x i64> %1, %1 3145 %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2) 3146 %4 = add <8 x i16> %3, %3 3147 store <8 x i16> %4, ptr %q 3148 ret void 3149} 3150 3151declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p) 3152define void @test_v8i16_v4f32(ptr %p, ptr %q) { 3153; SOFT-LABEL: test_v8i16_v4f32: 3154; SOFT: @ %bb.0: 3155; SOFT-NEXT: .save {r4, lr} 3156; SOFT-NEXT: push {r4, lr} 3157; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3158; SOFT-NEXT: mov r4, r1 3159; SOFT-NEXT: vrev64.32 q8, q8 3160; SOFT-NEXT: vadd.f32 q8, q8, q8 3161; SOFT-NEXT: vrev64.32 q8, q8 3162; SOFT-NEXT: vmov r1, r0, d16 3163; SOFT-NEXT: vmov r3, r2, d17 3164; SOFT-NEXT: bl test_v8i16_v4f32_helper 3165; SOFT-NEXT: vmov d17, r3, r2 3166; SOFT-NEXT: vmov d16, r1, r0 3167; SOFT-NEXT: vrev64.16 q8, q8 3168; SOFT-NEXT: vadd.i16 q8, q8, q8 3169; SOFT-NEXT: vrev64.16 q8, q8 3170; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3171; SOFT-NEXT: pop {r4, pc} 3172; 3173; HARD-LABEL: test_v8i16_v4f32: 3174; HARD: @ %bb.0: 3175; HARD-NEXT: .save {r4, lr} 3176; HARD-NEXT: push {r4, lr} 3177; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3178; HARD-NEXT: mov r4, r1 3179; HARD-NEXT: vrev64.32 q8, q8 3180; HARD-NEXT: vadd.f32 q8, q8, q8 3181; HARD-NEXT: vrev64.32 q0, q8 3182; HARD-NEXT: bl test_v8i16_v4f32_helper 3183; HARD-NEXT: vrev64.16 q8, q0 3184; HARD-NEXT: vadd.i16 q8, q8, q8 3185; HARD-NEXT: vrev64.16 q8, q8 3186; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3187; HARD-NEXT: pop {r4, pc} 3188 %1 = load <4 x float>, ptr %p 3189 %2 = fadd <4 x float> %1, %1 3190 %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2) 3191 %4 = add <8 x i16> %3, %3 3192 store <8 x i16> %4, ptr %q 3193 ret void 3194} 3195 3196declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p) 3197define void @test_v8i16_v4i32(ptr %p, ptr %q) { 3198; SOFT-LABEL: test_v8i16_v4i32: 3199; SOFT: @ %bb.0: 3200; SOFT-NEXT: .save {r4, lr} 3201; SOFT-NEXT: push {r4, lr} 3202; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3203; SOFT-NEXT: mov r4, r1 3204; SOFT-NEXT: vrev64.32 q8, q8 3205; SOFT-NEXT: vadd.i32 q8, q8, q8 3206; SOFT-NEXT: vrev64.32 q8, q8 3207; SOFT-NEXT: vmov r1, r0, d16 3208; SOFT-NEXT: vmov r3, r2, d17 3209; SOFT-NEXT: bl test_v8i16_v4i32_helper 3210; SOFT-NEXT: vmov d17, r3, r2 3211; SOFT-NEXT: vmov d16, r1, r0 3212; SOFT-NEXT: vrev64.16 q8, q8 3213; SOFT-NEXT: vadd.i16 q8, q8, q8 3214; SOFT-NEXT: vrev64.16 q8, q8 3215; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3216; SOFT-NEXT: pop {r4, pc} 3217; 3218; HARD-LABEL: test_v8i16_v4i32: 3219; HARD: @ %bb.0: 3220; HARD-NEXT: .save {r4, lr} 3221; HARD-NEXT: push {r4, lr} 3222; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3223; HARD-NEXT: mov r4, r1 3224; HARD-NEXT: vrev64.32 q8, q8 3225; HARD-NEXT: vadd.i32 q8, q8, q8 3226; HARD-NEXT: vrev64.32 q0, q8 3227; HARD-NEXT: bl test_v8i16_v4i32_helper 3228; HARD-NEXT: vrev64.16 q8, q0 3229; HARD-NEXT: vadd.i16 q8, q8, q8 3230; HARD-NEXT: vrev64.16 q8, q8 3231; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3232; HARD-NEXT: pop {r4, pc} 3233 %1 = load <4 x i32>, ptr %p 3234 %2 = add <4 x i32> %1, %1 3235 %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2) 3236 %4 = add <8 x i16> %3, %3 3237 store <8 x i16> %4, ptr %q 3238 ret void 3239} 3240 3241declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p) 3242define void @test_v8i16_v16i8(ptr %p, ptr %q) { 3243; SOFT-LABEL: test_v8i16_v16i8: 3244; SOFT: @ %bb.0: 3245; SOFT-NEXT: .save {r4, lr} 3246; SOFT-NEXT: push {r4, lr} 3247; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3248; SOFT-NEXT: mov r4, r1 3249; SOFT-NEXT: vrev64.8 q8, q8 3250; SOFT-NEXT: vadd.i8 q8, q8, q8 3251; SOFT-NEXT: vrev64.8 q8, q8 3252; SOFT-NEXT: vmov r1, r0, d16 3253; SOFT-NEXT: vmov r3, r2, d17 3254; SOFT-NEXT: bl test_v8i16_v16i8_helper 3255; SOFT-NEXT: vmov d17, r3, r2 3256; SOFT-NEXT: vmov d16, r1, r0 3257; SOFT-NEXT: vrev64.16 q8, q8 3258; SOFT-NEXT: vadd.i16 q8, q8, q8 3259; SOFT-NEXT: vrev64.16 q8, q8 3260; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3261; SOFT-NEXT: pop {r4, pc} 3262; 3263; HARD-LABEL: test_v8i16_v16i8: 3264; HARD: @ %bb.0: 3265; HARD-NEXT: .save {r4, lr} 3266; HARD-NEXT: push {r4, lr} 3267; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3268; HARD-NEXT: mov r4, r1 3269; HARD-NEXT: vrev64.8 q8, q8 3270; HARD-NEXT: vadd.i8 q8, q8, q8 3271; HARD-NEXT: vrev64.8 q0, q8 3272; HARD-NEXT: bl test_v8i16_v16i8_helper 3273; HARD-NEXT: vrev64.16 q8, q0 3274; HARD-NEXT: vadd.i16 q8, q8, q8 3275; HARD-NEXT: vrev64.16 q8, q8 3276; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3277; HARD-NEXT: pop {r4, pc} 3278 %1 = load <16 x i8>, ptr %p 3279 %2 = add <16 x i8> %1, %1 3280 %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2) 3281 %4 = add <8 x i16> %3, %3 3282 store <8 x i16> %4, ptr %q 3283 ret void 3284} 3285 3286declare <16 x i8> @test_v16i8_f128_helper(fp128 %p) 3287define void @test_v16i8_f128(ptr %p, ptr %q) { 3288; SOFT-LABEL: test_v16i8_f128: 3289; SOFT: @ %bb.0: 3290; SOFT-NEXT: .save {r4, r5, r11, lr} 3291; SOFT-NEXT: push {r4, r5, r11, lr} 3292; SOFT-NEXT: .pad #16 3293; SOFT-NEXT: sub sp, sp, #16 3294; SOFT-NEXT: ldr r4, [r0] 3295; SOFT-NEXT: mov r5, r1 3296; SOFT-NEXT: ldmib r0, {r1, r2, r3} 3297; SOFT-NEXT: mov r0, r4 3298; SOFT-NEXT: str r4, [sp] 3299; SOFT-NEXT: stmib sp, {r1, r2, r3} 3300; SOFT-NEXT: bl __addtf3 3301; SOFT-NEXT: bl test_v16i8_f128_helper 3302; SOFT-NEXT: vmov d17, r3, r2 3303; SOFT-NEXT: vmov d16, r1, r0 3304; SOFT-NEXT: vrev64.8 q8, q8 3305; SOFT-NEXT: vadd.i8 q8, q8, q8 3306; SOFT-NEXT: vrev64.8 q8, q8 3307; SOFT-NEXT: vst1.64 {d16, d17}, [r5] 3308; SOFT-NEXT: add sp, sp, #16 3309; SOFT-NEXT: pop {r4, r5, r11, pc} 3310; 3311; HARD-LABEL: test_v16i8_f128: 3312; HARD: @ %bb.0: 3313; HARD-NEXT: .save {r4, r5, r11, lr} 3314; HARD-NEXT: push {r4, r5, r11, lr} 3315; HARD-NEXT: .pad #16 3316; HARD-NEXT: sub sp, sp, #16 3317; HARD-NEXT: ldr r4, [r0] 3318; HARD-NEXT: mov r5, r1 3319; HARD-NEXT: ldmib r0, {r1, r2, r3} 3320; HARD-NEXT: mov r0, r4 3321; HARD-NEXT: str r4, [sp] 3322; HARD-NEXT: stmib sp, {r1, r2, r3} 3323; HARD-NEXT: bl __addtf3 3324; HARD-NEXT: bl test_v16i8_f128_helper 3325; HARD-NEXT: vrev64.8 q8, q0 3326; HARD-NEXT: vadd.i8 q8, q8, q8 3327; HARD-NEXT: vrev64.8 q8, q8 3328; HARD-NEXT: vst1.64 {d16, d17}, [r5] 3329; HARD-NEXT: add sp, sp, #16 3330; HARD-NEXT: pop {r4, r5, r11, pc} 3331 %1 = load fp128, ptr %p 3332 %2 = fadd fp128 %1, %1 3333 %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2) 3334 %4 = add <16 x i8> %3, %3 3335 store <16 x i8> %4, ptr %q 3336 ret void 3337} 3338 3339declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p) 3340define void @test_v16i8_v2f64(ptr %p, ptr %q) { 3341; SOFT-LABEL: test_v16i8_v2f64: 3342; SOFT: @ %bb.0: 3343; SOFT-NEXT: .save {r4, lr} 3344; SOFT-NEXT: push {r4, lr} 3345; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3346; SOFT-NEXT: mov r4, r1 3347; SOFT-NEXT: vadd.f64 d18, d16, d16 3348; SOFT-NEXT: vadd.f64 d16, d17, d17 3349; SOFT-NEXT: vmov r1, r0, d18 3350; SOFT-NEXT: vmov r3, r2, d16 3351; SOFT-NEXT: bl test_v16i8_v2f64_helper 3352; SOFT-NEXT: vmov d17, r3, r2 3353; SOFT-NEXT: vmov d16, r1, r0 3354; SOFT-NEXT: vrev64.8 q8, q8 3355; SOFT-NEXT: vadd.i8 q8, q8, q8 3356; SOFT-NEXT: vrev64.8 q8, q8 3357; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3358; SOFT-NEXT: pop {r4, pc} 3359; 3360; HARD-LABEL: test_v16i8_v2f64: 3361; HARD: @ %bb.0: 3362; HARD-NEXT: .save {r4, lr} 3363; HARD-NEXT: push {r4, lr} 3364; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3365; HARD-NEXT: mov r4, r1 3366; HARD-NEXT: vadd.f64 d1, d17, d17 3367; HARD-NEXT: vadd.f64 d0, d16, d16 3368; HARD-NEXT: bl test_v16i8_v2f64_helper 3369; HARD-NEXT: vrev64.8 q8, q0 3370; HARD-NEXT: vadd.i8 q8, q8, q8 3371; HARD-NEXT: vrev64.8 q8, q8 3372; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3373; HARD-NEXT: pop {r4, pc} 3374 %1 = load <2 x double>, ptr %p 3375 %2 = fadd <2 x double> %1, %1 3376 %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2) 3377 %4 = add <16 x i8> %3, %3 3378 store <16 x i8> %4, ptr %q 3379 ret void 3380} 3381 3382declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p) 3383define void @test_v16i8_v2i64(ptr %p, ptr %q) { 3384; SOFT-LABEL: test_v16i8_v2i64: 3385; SOFT: @ %bb.0: 3386; SOFT-NEXT: .save {r4, lr} 3387; SOFT-NEXT: push {r4, lr} 3388; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3389; SOFT-NEXT: mov r4, r1 3390; SOFT-NEXT: vadd.i64 q8, q8, q8 3391; SOFT-NEXT: vmov r1, r0, d16 3392; SOFT-NEXT: vmov r3, r2, d17 3393; SOFT-NEXT: bl test_v16i8_v2i64_helper 3394; SOFT-NEXT: vmov d17, r3, r2 3395; SOFT-NEXT: vmov d16, r1, r0 3396; SOFT-NEXT: vrev64.8 q8, q8 3397; SOFT-NEXT: vadd.i8 q8, q8, q8 3398; SOFT-NEXT: vrev64.8 q8, q8 3399; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3400; SOFT-NEXT: pop {r4, pc} 3401; 3402; HARD-LABEL: test_v16i8_v2i64: 3403; HARD: @ %bb.0: 3404; HARD-NEXT: .save {r4, lr} 3405; HARD-NEXT: push {r4, lr} 3406; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3407; HARD-NEXT: mov r4, r1 3408; HARD-NEXT: vadd.i64 q0, q8, q8 3409; HARD-NEXT: bl test_v16i8_v2i64_helper 3410; HARD-NEXT: vrev64.8 q8, q0 3411; HARD-NEXT: vadd.i8 q8, q8, q8 3412; HARD-NEXT: vrev64.8 q8, q8 3413; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3414; HARD-NEXT: pop {r4, pc} 3415 %1 = load <2 x i64>, ptr %p 3416 %2 = add <2 x i64> %1, %1 3417 %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2) 3418 %4 = add <16 x i8> %3, %3 3419 store <16 x i8> %4, ptr %q 3420 ret void 3421} 3422 3423declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p) 3424define void @test_v16i8_v4f32(ptr %p, ptr %q) { 3425; SOFT-LABEL: test_v16i8_v4f32: 3426; SOFT: @ %bb.0: 3427; SOFT-NEXT: .save {r4, lr} 3428; SOFT-NEXT: push {r4, lr} 3429; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3430; SOFT-NEXT: mov r4, r1 3431; SOFT-NEXT: vrev64.32 q8, q8 3432; SOFT-NEXT: vadd.f32 q8, q8, q8 3433; SOFT-NEXT: vrev64.32 q8, q8 3434; SOFT-NEXT: vmov r1, r0, d16 3435; SOFT-NEXT: vmov r3, r2, d17 3436; SOFT-NEXT: bl test_v16i8_v4f32_helper 3437; SOFT-NEXT: vmov d17, r3, r2 3438; SOFT-NEXT: vmov d16, r1, r0 3439; SOFT-NEXT: vrev64.8 q8, q8 3440; SOFT-NEXT: vadd.i8 q8, q8, q8 3441; SOFT-NEXT: vrev64.8 q8, q8 3442; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3443; SOFT-NEXT: pop {r4, pc} 3444; 3445; HARD-LABEL: test_v16i8_v4f32: 3446; HARD: @ %bb.0: 3447; HARD-NEXT: .save {r4, lr} 3448; HARD-NEXT: push {r4, lr} 3449; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3450; HARD-NEXT: mov r4, r1 3451; HARD-NEXT: vrev64.32 q8, q8 3452; HARD-NEXT: vadd.f32 q8, q8, q8 3453; HARD-NEXT: vrev64.32 q0, q8 3454; HARD-NEXT: bl test_v16i8_v4f32_helper 3455; HARD-NEXT: vrev64.8 q8, q0 3456; HARD-NEXT: vadd.i8 q8, q8, q8 3457; HARD-NEXT: vrev64.8 q8, q8 3458; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3459; HARD-NEXT: pop {r4, pc} 3460 %1 = load <4 x float>, ptr %p 3461 %2 = fadd <4 x float> %1, %1 3462 %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2) 3463 %4 = add <16 x i8> %3, %3 3464 store <16 x i8> %4, ptr %q 3465 ret void 3466} 3467 3468declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p) 3469define void @test_v16i8_v4i32(ptr %p, ptr %q) { 3470; SOFT-LABEL: test_v16i8_v4i32: 3471; SOFT: @ %bb.0: 3472; SOFT-NEXT: .save {r4, lr} 3473; SOFT-NEXT: push {r4, lr} 3474; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3475; SOFT-NEXT: mov r4, r1 3476; SOFT-NEXT: vrev64.32 q8, q8 3477; SOFT-NEXT: vadd.i32 q8, q8, q8 3478; SOFT-NEXT: vrev64.32 q8, q8 3479; SOFT-NEXT: vmov r1, r0, d16 3480; SOFT-NEXT: vmov r3, r2, d17 3481; SOFT-NEXT: bl test_v16i8_v4i32_helper 3482; SOFT-NEXT: vmov d17, r3, r2 3483; SOFT-NEXT: vmov d16, r1, r0 3484; SOFT-NEXT: vrev64.8 q8, q8 3485; SOFT-NEXT: vadd.i8 q8, q8, q8 3486; SOFT-NEXT: vrev64.8 q8, q8 3487; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3488; SOFT-NEXT: pop {r4, pc} 3489; 3490; HARD-LABEL: test_v16i8_v4i32: 3491; HARD: @ %bb.0: 3492; HARD-NEXT: .save {r4, lr} 3493; HARD-NEXT: push {r4, lr} 3494; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3495; HARD-NEXT: mov r4, r1 3496; HARD-NEXT: vrev64.32 q8, q8 3497; HARD-NEXT: vadd.i32 q8, q8, q8 3498; HARD-NEXT: vrev64.32 q0, q8 3499; HARD-NEXT: bl test_v16i8_v4i32_helper 3500; HARD-NEXT: vrev64.8 q8, q0 3501; HARD-NEXT: vadd.i8 q8, q8, q8 3502; HARD-NEXT: vrev64.8 q8, q8 3503; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3504; HARD-NEXT: pop {r4, pc} 3505 %1 = load <4 x i32>, ptr %p 3506 %2 = add <4 x i32> %1, %1 3507 %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2) 3508 %4 = add <16 x i8> %3, %3 3509 store <16 x i8> %4, ptr %q 3510 ret void 3511} 3512 3513declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p) 3514define void @test_v16i8_v8i16(ptr %p, ptr %q) { 3515; SOFT-LABEL: test_v16i8_v8i16: 3516; SOFT: @ %bb.0: 3517; SOFT-NEXT: .save {r4, lr} 3518; SOFT-NEXT: push {r4, lr} 3519; SOFT-NEXT: vld1.64 {d16, d17}, [r0] 3520; SOFT-NEXT: mov r4, r1 3521; SOFT-NEXT: vrev64.16 q8, q8 3522; SOFT-NEXT: vadd.i16 q8, q8, q8 3523; SOFT-NEXT: vrev64.16 q8, q8 3524; SOFT-NEXT: vmov r1, r0, d16 3525; SOFT-NEXT: vmov r3, r2, d17 3526; SOFT-NEXT: bl test_v16i8_v8i16_helper 3527; SOFT-NEXT: vmov d17, r3, r2 3528; SOFT-NEXT: vmov d16, r1, r0 3529; SOFT-NEXT: vrev64.8 q8, q8 3530; SOFT-NEXT: vadd.i8 q8, q8, q8 3531; SOFT-NEXT: vrev64.8 q8, q8 3532; SOFT-NEXT: vst1.64 {d16, d17}, [r4] 3533; SOFT-NEXT: pop {r4, pc} 3534; 3535; HARD-LABEL: test_v16i8_v8i16: 3536; HARD: @ %bb.0: 3537; HARD-NEXT: .save {r4, lr} 3538; HARD-NEXT: push {r4, lr} 3539; HARD-NEXT: vld1.64 {d16, d17}, [r0] 3540; HARD-NEXT: mov r4, r1 3541; HARD-NEXT: vrev64.16 q8, q8 3542; HARD-NEXT: vadd.i16 q8, q8, q8 3543; HARD-NEXT: vrev64.16 q0, q8 3544; HARD-NEXT: bl test_v16i8_v8i16_helper 3545; HARD-NEXT: vrev64.8 q8, q0 3546; HARD-NEXT: vadd.i8 q8, q8, q8 3547; HARD-NEXT: vrev64.8 q8, q8 3548; HARD-NEXT: vst1.64 {d16, d17}, [r4] 3549; HARD-NEXT: pop {r4, pc} 3550 %1 = load <8 x i16>, ptr %p 3551 %2 = add <8 x i16> %1, %1 3552 %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2) 3553 %4 = add <16 x i8> %3, %3 3554 store <16 x i8> %4, ptr %q 3555 ret void 3556} 3557