1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s --check-prefixes=CHECK-LE,CHECK-MVE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-eabi -mattr=+mve -o - %s | FileCheck %s --check-prefix=CHECK-BE 4; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck %s --check-prefixes=CHECK-LE,CHECK-FP 5 6define <16 x i8> @vector_add_i8(<16 x i8> %lhs, <16 x i8> %rhs) { 7; CHECK-LE-LABEL: vector_add_i8: 8; CHECK-LE: @ %bb.0: @ %entry 9; CHECK-LE-NEXT: vmov d0, r0, r1 10; CHECK-LE-NEXT: mov r0, sp 11; CHECK-LE-NEXT: vldrw.u32 q1, [r0] 12; CHECK-LE-NEXT: vmov d1, r2, r3 13; CHECK-LE-NEXT: vadd.i8 q0, q0, q1 14; CHECK-LE-NEXT: vmov r0, r1, d0 15; CHECK-LE-NEXT: vmov r2, r3, d1 16; CHECK-LE-NEXT: bx lr 17; 18; CHECK-BE-LABEL: vector_add_i8: 19; CHECK-BE: @ %bb.0: @ %entry 20; CHECK-BE-NEXT: vmov d0, r1, r0 21; CHECK-BE-NEXT: mov r0, sp 22; CHECK-BE-NEXT: vmov d1, r3, r2 23; CHECK-BE-NEXT: vrev64.8 q1, q0 24; CHECK-BE-NEXT: vldrb.u8 q0, [r0] 25; CHECK-BE-NEXT: vadd.i8 q0, q1, q0 26; CHECK-BE-NEXT: vrev64.8 q1, q0 27; CHECK-BE-NEXT: vmov r1, r0, d2 28; CHECK-BE-NEXT: vmov r3, r2, d3 29; CHECK-BE-NEXT: bx lr 30entry: 31 %sum = add <16 x i8> %lhs, %rhs 32 ret <16 x i8> %sum 33} 34 35define <8 x i16> @vector_add_i16(<8 x i16> %lhs, <8 x i16> %rhs) { 36; CHECK-LE-LABEL: vector_add_i16: 37; CHECK-LE: @ %bb.0: @ %entry 38; CHECK-LE-NEXT: vmov d0, r0, r1 39; CHECK-LE-NEXT: mov r0, sp 40; CHECK-LE-NEXT: vldrw.u32 q1, [r0] 41; CHECK-LE-NEXT: vmov d1, r2, r3 42; CHECK-LE-NEXT: vadd.i16 q0, q0, q1 43; CHECK-LE-NEXT: vmov r0, r1, d0 44; CHECK-LE-NEXT: vmov r2, r3, d1 45; CHECK-LE-NEXT: bx lr 46; 47; CHECK-BE-LABEL: vector_add_i16: 48; CHECK-BE: @ %bb.0: @ %entry 49; CHECK-BE-NEXT: vmov d0, r1, r0 50; CHECK-BE-NEXT: mov r0, sp 51; CHECK-BE-NEXT: vmov d1, r3, r2 52; CHECK-BE-NEXT: vrev64.16 q1, q0 53; CHECK-BE-NEXT: vldrh.u16 q0, [r0] 54; CHECK-BE-NEXT: vadd.i16 q0, q1, q0 55; CHECK-BE-NEXT: vrev64.16 q1, q0 56; CHECK-BE-NEXT: vmov r1, r0, d2 57; CHECK-BE-NEXT: vmov r3, r2, d3 58; CHECK-BE-NEXT: bx lr 59entry: 60 %sum = add <8 x i16> %lhs, %rhs 61 ret <8 x i16> %sum 62} 63 64define <4 x i32> @vector_add_i32(<4 x i32> %lhs, <4 x i32> %rhs) { 65; CHECK-LE-LABEL: vector_add_i32: 66; CHECK-LE: @ %bb.0: @ %entry 67; CHECK-LE-NEXT: vmov d0, r0, r1 68; CHECK-LE-NEXT: mov r0, sp 69; CHECK-LE-NEXT: vldrw.u32 q1, [r0] 70; CHECK-LE-NEXT: vmov d1, r2, r3 71; CHECK-LE-NEXT: vadd.i32 q0, q0, q1 72; CHECK-LE-NEXT: vmov r0, r1, d0 73; CHECK-LE-NEXT: vmov r2, r3, d1 74; CHECK-LE-NEXT: bx lr 75; 76; CHECK-BE-LABEL: vector_add_i32: 77; CHECK-BE: @ %bb.0: @ %entry 78; CHECK-BE-NEXT: vmov d0, r1, r0 79; CHECK-BE-NEXT: mov r0, sp 80; CHECK-BE-NEXT: vmov d1, r3, r2 81; CHECK-BE-NEXT: vrev64.32 q1, q0 82; CHECK-BE-NEXT: vldrw.u32 q0, [r0] 83; CHECK-BE-NEXT: vadd.i32 q0, q1, q0 84; CHECK-BE-NEXT: vrev64.32 q1, q0 85; CHECK-BE-NEXT: vmov r1, r0, d2 86; CHECK-BE-NEXT: vmov r3, r2, d3 87; CHECK-BE-NEXT: bx lr 88entry: 89 %sum = add <4 x i32> %lhs, %rhs 90 ret <4 x i32> %sum 91} 92 93define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) { 94; CHECK-MVE-LABEL: vector_add_i64: 95; CHECK-MVE: @ %bb.0: @ %entry 96; CHECK-MVE-NEXT: .save {r7, lr} 97; CHECK-MVE-NEXT: push {r7, lr} 98; CHECK-MVE-NEXT: add.w r12, sp, #8 99; CHECK-MVE-NEXT: vldrw.u32 q0, [r12] 100; CHECK-MVE-NEXT: vmov r12, lr, d0 101; CHECK-MVE-NEXT: adds.w r0, r0, r12 102; CHECK-MVE-NEXT: adc.w r1, r1, lr 103; CHECK-MVE-NEXT: vmov r12, lr, d1 104; CHECK-MVE-NEXT: adds.w r2, r2, r12 105; CHECK-MVE-NEXT: adc.w r3, r3, lr 106; CHECK-MVE-NEXT: pop {r7, pc} 107; 108; CHECK-BE-LABEL: vector_add_i64: 109; CHECK-BE: @ %bb.0: @ %entry 110; CHECK-BE-NEXT: .save {r7, lr} 111; CHECK-BE-NEXT: push {r7, lr} 112; CHECK-BE-NEXT: add.w r12, sp, #8 113; CHECK-BE-NEXT: vldrw.u32 q0, [r12] 114; CHECK-BE-NEXT: vmov r12, lr, d0 115; CHECK-BE-NEXT: adds.w r1, r1, lr 116; CHECK-BE-NEXT: adc.w r0, r0, r12 117; CHECK-BE-NEXT: vmov r12, lr, d1 118; CHECK-BE-NEXT: adds.w r3, r3, lr 119; CHECK-BE-NEXT: adc.w r2, r2, r12 120; CHECK-BE-NEXT: pop {r7, pc} 121; 122; CHECK-FP-LABEL: vector_add_i64: 123; CHECK-FP: @ %bb.0: @ %entry 124; CHECK-FP-NEXT: .save {r4, r5, r7, lr} 125; CHECK-FP-NEXT: push {r4, r5, r7, lr} 126; CHECK-FP-NEXT: add.w r12, sp, #16 127; CHECK-FP-NEXT: vldrw.u32 q0, [r12] 128; CHECK-FP-NEXT: vmov r12, lr, d0 129; CHECK-FP-NEXT: vmov r4, r5, d1 130; CHECK-FP-NEXT: adds.w r0, r0, r12 131; CHECK-FP-NEXT: adc.w r1, r1, lr 132; CHECK-FP-NEXT: adds r2, r2, r4 133; CHECK-FP-NEXT: adcs r3, r5 134; CHECK-FP-NEXT: pop {r4, r5, r7, pc} 135entry: 136 %sum = add <2 x i64> %lhs, %rhs 137 ret <2 x i64> %sum 138} 139 140define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) { 141; CHECK-MVE-LABEL: vector_add_f16: 142; CHECK-MVE: @ %bb.0: @ %entry 143; CHECK-MVE-NEXT: .save {r4, r5, r7, lr} 144; CHECK-MVE-NEXT: push {r4, r5, r7, lr} 145; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 146; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11, d12, d13} 147; CHECK-MVE-NEXT: vmov d8, r0, r1 148; CHECK-MVE-NEXT: add r0, sp, #64 149; CHECK-MVE-NEXT: vldrw.u32 q6, [r0] 150; CHECK-MVE-NEXT: vmov d9, r2, r3 151; CHECK-MVE-NEXT: vmov.u16 r4, q4[0] 152; CHECK-MVE-NEXT: vmov.u16 r0, q6[0] 153; CHECK-MVE-NEXT: bl __aeabi_h2f 154; CHECK-MVE-NEXT: mov r5, r0 155; CHECK-MVE-NEXT: mov r0, r4 156; CHECK-MVE-NEXT: bl __aeabi_h2f 157; CHECK-MVE-NEXT: mov r1, r5 158; CHECK-MVE-NEXT: bl __aeabi_fadd 159; CHECK-MVE-NEXT: bl __aeabi_f2h 160; CHECK-MVE-NEXT: vmov.16 q5[0], r0 161; CHECK-MVE-NEXT: vmov.u16 r0, q6[1] 162; CHECK-MVE-NEXT: vmov.u16 r4, q4[1] 163; CHECK-MVE-NEXT: bl __aeabi_h2f 164; CHECK-MVE-NEXT: mov r5, r0 165; CHECK-MVE-NEXT: mov r0, r4 166; CHECK-MVE-NEXT: bl __aeabi_h2f 167; CHECK-MVE-NEXT: mov r1, r5 168; CHECK-MVE-NEXT: bl __aeabi_fadd 169; CHECK-MVE-NEXT: bl __aeabi_f2h 170; CHECK-MVE-NEXT: vmov.16 q5[1], r0 171; CHECK-MVE-NEXT: vmov.u16 r0, q6[2] 172; CHECK-MVE-NEXT: vmov.u16 r4, q4[2] 173; CHECK-MVE-NEXT: bl __aeabi_h2f 174; CHECK-MVE-NEXT: mov r5, r0 175; CHECK-MVE-NEXT: mov r0, r4 176; CHECK-MVE-NEXT: bl __aeabi_h2f 177; CHECK-MVE-NEXT: mov r1, r5 178; CHECK-MVE-NEXT: bl __aeabi_fadd 179; CHECK-MVE-NEXT: bl __aeabi_f2h 180; CHECK-MVE-NEXT: vmov.16 q5[2], r0 181; CHECK-MVE-NEXT: vmov.u16 r0, q6[3] 182; CHECK-MVE-NEXT: vmov.u16 r4, q4[3] 183; CHECK-MVE-NEXT: bl __aeabi_h2f 184; CHECK-MVE-NEXT: mov r5, r0 185; CHECK-MVE-NEXT: mov r0, r4 186; CHECK-MVE-NEXT: bl __aeabi_h2f 187; CHECK-MVE-NEXT: mov r1, r5 188; CHECK-MVE-NEXT: bl __aeabi_fadd 189; CHECK-MVE-NEXT: bl __aeabi_f2h 190; CHECK-MVE-NEXT: vmov.16 q5[3], r0 191; CHECK-MVE-NEXT: vmov.u16 r0, q6[4] 192; CHECK-MVE-NEXT: vmov.u16 r4, q4[4] 193; CHECK-MVE-NEXT: bl __aeabi_h2f 194; CHECK-MVE-NEXT: mov r5, r0 195; CHECK-MVE-NEXT: mov r0, r4 196; CHECK-MVE-NEXT: bl __aeabi_h2f 197; CHECK-MVE-NEXT: mov r1, r5 198; CHECK-MVE-NEXT: bl __aeabi_fadd 199; CHECK-MVE-NEXT: bl __aeabi_f2h 200; CHECK-MVE-NEXT: vmov.16 q5[4], r0 201; CHECK-MVE-NEXT: vmov.u16 r0, q6[5] 202; CHECK-MVE-NEXT: vmov.u16 r4, q4[5] 203; CHECK-MVE-NEXT: bl __aeabi_h2f 204; CHECK-MVE-NEXT: mov r5, r0 205; CHECK-MVE-NEXT: mov r0, r4 206; CHECK-MVE-NEXT: bl __aeabi_h2f 207; CHECK-MVE-NEXT: mov r1, r5 208; CHECK-MVE-NEXT: bl __aeabi_fadd 209; CHECK-MVE-NEXT: bl __aeabi_f2h 210; CHECK-MVE-NEXT: vmov.16 q5[5], r0 211; CHECK-MVE-NEXT: vmov.u16 r0, q6[6] 212; CHECK-MVE-NEXT: vmov.u16 r4, q4[6] 213; CHECK-MVE-NEXT: bl __aeabi_h2f 214; CHECK-MVE-NEXT: mov r5, r0 215; CHECK-MVE-NEXT: mov r0, r4 216; CHECK-MVE-NEXT: bl __aeabi_h2f 217; CHECK-MVE-NEXT: mov r1, r5 218; CHECK-MVE-NEXT: bl __aeabi_fadd 219; CHECK-MVE-NEXT: bl __aeabi_f2h 220; CHECK-MVE-NEXT: vmov.16 q5[6], r0 221; CHECK-MVE-NEXT: vmov.u16 r0, q6[7] 222; CHECK-MVE-NEXT: vmov.u16 r4, q4[7] 223; CHECK-MVE-NEXT: bl __aeabi_h2f 224; CHECK-MVE-NEXT: mov r5, r0 225; CHECK-MVE-NEXT: mov r0, r4 226; CHECK-MVE-NEXT: bl __aeabi_h2f 227; CHECK-MVE-NEXT: mov r1, r5 228; CHECK-MVE-NEXT: bl __aeabi_fadd 229; CHECK-MVE-NEXT: bl __aeabi_f2h 230; CHECK-MVE-NEXT: vmov.16 q5[7], r0 231; CHECK-MVE-NEXT: vmov r0, r1, d10 232; CHECK-MVE-NEXT: vmov r2, r3, d11 233; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11, d12, d13} 234; CHECK-MVE-NEXT: pop {r4, r5, r7, pc} 235; 236; CHECK-BE-LABEL: vector_add_f16: 237; CHECK-BE: @ %bb.0: @ %entry 238; CHECK-BE-NEXT: .save {r4, r5, r7, lr} 239; CHECK-BE-NEXT: push {r4, r5, r7, lr} 240; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 241; CHECK-BE-NEXT: vpush {d8, d9, d10, d11, d12, d13} 242; CHECK-BE-NEXT: vmov d0, r1, r0 243; CHECK-BE-NEXT: add r0, sp, #64 244; CHECK-BE-NEXT: vldrh.u16 q6, [r0] 245; CHECK-BE-NEXT: vmov d1, r3, r2 246; CHECK-BE-NEXT: vrev64.16 q4, q0 247; CHECK-BE-NEXT: vmov.u16 r0, q6[0] 248; CHECK-BE-NEXT: vmov.u16 r4, q4[0] 249; CHECK-BE-NEXT: bl __aeabi_h2f 250; CHECK-BE-NEXT: mov r5, r0 251; CHECK-BE-NEXT: mov r0, r4 252; CHECK-BE-NEXT: bl __aeabi_h2f 253; CHECK-BE-NEXT: mov r1, r5 254; CHECK-BE-NEXT: bl __aeabi_fadd 255; CHECK-BE-NEXT: bl __aeabi_f2h 256; CHECK-BE-NEXT: vmov.16 q5[0], r0 257; CHECK-BE-NEXT: vmov.u16 r0, q6[1] 258; CHECK-BE-NEXT: vmov.u16 r4, q4[1] 259; CHECK-BE-NEXT: bl __aeabi_h2f 260; CHECK-BE-NEXT: mov r5, r0 261; CHECK-BE-NEXT: mov r0, r4 262; CHECK-BE-NEXT: bl __aeabi_h2f 263; CHECK-BE-NEXT: mov r1, r5 264; CHECK-BE-NEXT: bl __aeabi_fadd 265; CHECK-BE-NEXT: bl __aeabi_f2h 266; CHECK-BE-NEXT: vmov.16 q5[1], r0 267; CHECK-BE-NEXT: vmov.u16 r0, q6[2] 268; CHECK-BE-NEXT: vmov.u16 r4, q4[2] 269; CHECK-BE-NEXT: bl __aeabi_h2f 270; CHECK-BE-NEXT: mov r5, r0 271; CHECK-BE-NEXT: mov r0, r4 272; CHECK-BE-NEXT: bl __aeabi_h2f 273; CHECK-BE-NEXT: mov r1, r5 274; CHECK-BE-NEXT: bl __aeabi_fadd 275; CHECK-BE-NEXT: bl __aeabi_f2h 276; CHECK-BE-NEXT: vmov.16 q5[2], r0 277; CHECK-BE-NEXT: vmov.u16 r0, q6[3] 278; CHECK-BE-NEXT: vmov.u16 r4, q4[3] 279; CHECK-BE-NEXT: bl __aeabi_h2f 280; CHECK-BE-NEXT: mov r5, r0 281; CHECK-BE-NEXT: mov r0, r4 282; CHECK-BE-NEXT: bl __aeabi_h2f 283; CHECK-BE-NEXT: mov r1, r5 284; CHECK-BE-NEXT: bl __aeabi_fadd 285; CHECK-BE-NEXT: bl __aeabi_f2h 286; CHECK-BE-NEXT: vmov.16 q5[3], r0 287; CHECK-BE-NEXT: vmov.u16 r0, q6[4] 288; CHECK-BE-NEXT: vmov.u16 r4, q4[4] 289; CHECK-BE-NEXT: bl __aeabi_h2f 290; CHECK-BE-NEXT: mov r5, r0 291; CHECK-BE-NEXT: mov r0, r4 292; CHECK-BE-NEXT: bl __aeabi_h2f 293; CHECK-BE-NEXT: mov r1, r5 294; CHECK-BE-NEXT: bl __aeabi_fadd 295; CHECK-BE-NEXT: bl __aeabi_f2h 296; CHECK-BE-NEXT: vmov.16 q5[4], r0 297; CHECK-BE-NEXT: vmov.u16 r0, q6[5] 298; CHECK-BE-NEXT: vmov.u16 r4, q4[5] 299; CHECK-BE-NEXT: bl __aeabi_h2f 300; CHECK-BE-NEXT: mov r5, r0 301; CHECK-BE-NEXT: mov r0, r4 302; CHECK-BE-NEXT: bl __aeabi_h2f 303; CHECK-BE-NEXT: mov r1, r5 304; CHECK-BE-NEXT: bl __aeabi_fadd 305; CHECK-BE-NEXT: bl __aeabi_f2h 306; CHECK-BE-NEXT: vmov.16 q5[5], r0 307; CHECK-BE-NEXT: vmov.u16 r0, q6[6] 308; CHECK-BE-NEXT: vmov.u16 r4, q4[6] 309; CHECK-BE-NEXT: bl __aeabi_h2f 310; CHECK-BE-NEXT: mov r5, r0 311; CHECK-BE-NEXT: mov r0, r4 312; CHECK-BE-NEXT: bl __aeabi_h2f 313; CHECK-BE-NEXT: mov r1, r5 314; CHECK-BE-NEXT: bl __aeabi_fadd 315; CHECK-BE-NEXT: bl __aeabi_f2h 316; CHECK-BE-NEXT: vmov.16 q5[6], r0 317; CHECK-BE-NEXT: vmov.u16 r0, q6[7] 318; CHECK-BE-NEXT: vmov.u16 r4, q4[7] 319; CHECK-BE-NEXT: bl __aeabi_h2f 320; CHECK-BE-NEXT: mov r5, r0 321; CHECK-BE-NEXT: mov r0, r4 322; CHECK-BE-NEXT: bl __aeabi_h2f 323; CHECK-BE-NEXT: mov r1, r5 324; CHECK-BE-NEXT: bl __aeabi_fadd 325; CHECK-BE-NEXT: bl __aeabi_f2h 326; CHECK-BE-NEXT: vmov.16 q5[7], r0 327; CHECK-BE-NEXT: vrev64.16 q0, q5 328; CHECK-BE-NEXT: vmov r1, r0, d0 329; CHECK-BE-NEXT: vmov r3, r2, d1 330; CHECK-BE-NEXT: vpop {d8, d9, d10, d11, d12, d13} 331; CHECK-BE-NEXT: pop {r4, r5, r7, pc} 332; 333; CHECK-FP-LABEL: vector_add_f16: 334; CHECK-FP: @ %bb.0: @ %entry 335; CHECK-FP-NEXT: vmov d0, r0, r1 336; CHECK-FP-NEXT: mov r0, sp 337; CHECK-FP-NEXT: vldrw.u32 q1, [r0] 338; CHECK-FP-NEXT: vmov d1, r2, r3 339; CHECK-FP-NEXT: vadd.f16 q0, q0, q1 340; CHECK-FP-NEXT: vmov r0, r1, d0 341; CHECK-FP-NEXT: vmov r2, r3, d1 342; CHECK-FP-NEXT: bx lr 343entry: 344 %sum = fadd <8 x half> %lhs, %rhs 345 ret <8 x half> %sum 346} 347 348define <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) { 349; CHECK-MVE-LABEL: vector_add_f32: 350; CHECK-MVE: @ %bb.0: @ %entry 351; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, lr} 352; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr} 353; CHECK-MVE-NEXT: .pad #4 354; CHECK-MVE-NEXT: sub sp, #4 355; CHECK-MVE-NEXT: .vsave {d8, d9} 356; CHECK-MVE-NEXT: vpush {d8, d9} 357; CHECK-MVE-NEXT: mov r4, r0 358; CHECK-MVE-NEXT: add r0, sp, #40 359; CHECK-MVE-NEXT: vldrw.u32 q4, [r0] 360; CHECK-MVE-NEXT: mov r6, r1 361; CHECK-MVE-NEXT: mov r0, r3 362; CHECK-MVE-NEXT: mov r5, r2 363; CHECK-MVE-NEXT: vmov r7, r1, d9 364; CHECK-MVE-NEXT: bl __aeabi_fadd 365; CHECK-MVE-NEXT: vmov s19, r0 366; CHECK-MVE-NEXT: mov r0, r5 367; CHECK-MVE-NEXT: mov r1, r7 368; CHECK-MVE-NEXT: bl __aeabi_fadd 369; CHECK-MVE-NEXT: vmov r5, r1, d8 370; CHECK-MVE-NEXT: vmov s18, r0 371; CHECK-MVE-NEXT: mov r0, r6 372; CHECK-MVE-NEXT: bl __aeabi_fadd 373; CHECK-MVE-NEXT: vmov s17, r0 374; CHECK-MVE-NEXT: mov r0, r4 375; CHECK-MVE-NEXT: mov r1, r5 376; CHECK-MVE-NEXT: bl __aeabi_fadd 377; CHECK-MVE-NEXT: vmov s16, r0 378; CHECK-MVE-NEXT: vmov r2, r3, d9 379; CHECK-MVE-NEXT: vmov r0, r1, d8 380; CHECK-MVE-NEXT: vpop {d8, d9} 381; CHECK-MVE-NEXT: add sp, #4 382; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc} 383; 384; CHECK-BE-LABEL: vector_add_f32: 385; CHECK-BE: @ %bb.0: @ %entry 386; CHECK-BE-NEXT: .save {r4, r5, r7, lr} 387; CHECK-BE-NEXT: push {r4, r5, r7, lr} 388; CHECK-BE-NEXT: .vsave {d8, d9, d10, d11} 389; CHECK-BE-NEXT: vpush {d8, d9, d10, d11} 390; CHECK-BE-NEXT: vmov d0, r1, r0 391; CHECK-BE-NEXT: add r1, sp, #48 392; CHECK-BE-NEXT: vldrw.u32 q5, [r1] 393; CHECK-BE-NEXT: vmov d1, r3, r2 394; CHECK-BE-NEXT: vrev64.32 q4, q0 395; CHECK-BE-NEXT: vmov r4, r0, d9 396; CHECK-BE-NEXT: vmov r5, r1, d11 397; CHECK-BE-NEXT: bl __aeabi_fadd 398; CHECK-BE-NEXT: vmov s19, r0 399; CHECK-BE-NEXT: mov r0, r4 400; CHECK-BE-NEXT: mov r1, r5 401; CHECK-BE-NEXT: bl __aeabi_fadd 402; CHECK-BE-NEXT: vmov s18, r0 403; CHECK-BE-NEXT: vmov r4, r0, d8 404; CHECK-BE-NEXT: vmov r5, r1, d10 405; CHECK-BE-NEXT: bl __aeabi_fadd 406; CHECK-BE-NEXT: vmov s17, r0 407; CHECK-BE-NEXT: mov r0, r4 408; CHECK-BE-NEXT: mov r1, r5 409; CHECK-BE-NEXT: bl __aeabi_fadd 410; CHECK-BE-NEXT: vmov s16, r0 411; CHECK-BE-NEXT: vrev64.32 q0, q4 412; CHECK-BE-NEXT: vmov r1, r0, d0 413; CHECK-BE-NEXT: vmov r3, r2, d1 414; CHECK-BE-NEXT: vpop {d8, d9, d10, d11} 415; CHECK-BE-NEXT: pop {r4, r5, r7, pc} 416; 417; CHECK-FP-LABEL: vector_add_f32: 418; CHECK-FP: @ %bb.0: @ %entry 419; CHECK-FP-NEXT: vmov d0, r0, r1 420; CHECK-FP-NEXT: mov r0, sp 421; CHECK-FP-NEXT: vldrw.u32 q1, [r0] 422; CHECK-FP-NEXT: vmov d1, r2, r3 423; CHECK-FP-NEXT: vadd.f32 q0, q0, q1 424; CHECK-FP-NEXT: vmov r0, r1, d0 425; CHECK-FP-NEXT: vmov r2, r3, d1 426; CHECK-FP-NEXT: bx lr 427entry: 428 %sum = fadd <4 x float> %lhs, %rhs 429 ret <4 x float> %sum 430} 431 432define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { 433; CHECK-MVE-LABEL: vector_add_f64: 434; CHECK-MVE: @ %bb.0: @ %entry 435; CHECK-MVE-NEXT: .save {r4, r5, r6, r7, lr} 436; CHECK-MVE-NEXT: push {r4, r5, r6, r7, lr} 437; CHECK-MVE-NEXT: .pad #4 438; CHECK-MVE-NEXT: sub sp, #4 439; CHECK-MVE-NEXT: .vsave {d8, d9} 440; CHECK-MVE-NEXT: vpush {d8, d9} 441; CHECK-MVE-NEXT: mov r5, r0 442; CHECK-MVE-NEXT: add r0, sp, #40 443; CHECK-MVE-NEXT: vldrw.u32 q4, [r0] 444; CHECK-MVE-NEXT: mov r4, r2 445; CHECK-MVE-NEXT: mov r6, r3 446; CHECK-MVE-NEXT: mov r7, r1 447; CHECK-MVE-NEXT: vmov r2, r3, d9 448; CHECK-MVE-NEXT: mov r0, r4 449; CHECK-MVE-NEXT: mov r1, r6 450; CHECK-MVE-NEXT: bl __aeabi_dadd 451; CHECK-MVE-NEXT: vmov r2, r3, d8 452; CHECK-MVE-NEXT: mov r4, r0 453; CHECK-MVE-NEXT: mov r6, r1 454; CHECK-MVE-NEXT: mov r0, r5 455; CHECK-MVE-NEXT: mov r1, r7 456; CHECK-MVE-NEXT: bl __aeabi_dadd 457; CHECK-MVE-NEXT: mov r2, r4 458; CHECK-MVE-NEXT: mov r3, r6 459; CHECK-MVE-NEXT: vpop {d8, d9} 460; CHECK-MVE-NEXT: add sp, #4 461; CHECK-MVE-NEXT: pop {r4, r5, r6, r7, pc} 462; 463; CHECK-BE-LABEL: vector_add_f64: 464; CHECK-BE: @ %bb.0: @ %entry 465; CHECK-BE-NEXT: .save {r4, r5, r6, r7, lr} 466; CHECK-BE-NEXT: push {r4, r5, r6, r7, lr} 467; CHECK-BE-NEXT: .pad #4 468; CHECK-BE-NEXT: sub sp, #4 469; CHECK-BE-NEXT: .vsave {d8, d9} 470; CHECK-BE-NEXT: vpush {d8, d9} 471; CHECK-BE-NEXT: mov r5, r0 472; CHECK-BE-NEXT: add r0, sp, #40 473; CHECK-BE-NEXT: vldrb.u8 q0, [r0] 474; CHECK-BE-NEXT: mov r6, r2 475; CHECK-BE-NEXT: mov r4, r3 476; CHECK-BE-NEXT: mov r7, r1 477; CHECK-BE-NEXT: vrev64.8 q4, q0 478; CHECK-BE-NEXT: mov r0, r6 479; CHECK-BE-NEXT: vmov r3, r2, d9 480; CHECK-BE-NEXT: mov r1, r4 481; CHECK-BE-NEXT: bl __aeabi_dadd 482; CHECK-BE-NEXT: vmov r3, r2, d8 483; CHECK-BE-NEXT: mov r4, r0 484; CHECK-BE-NEXT: mov r6, r1 485; CHECK-BE-NEXT: mov r0, r5 486; CHECK-BE-NEXT: mov r1, r7 487; CHECK-BE-NEXT: bl __aeabi_dadd 488; CHECK-BE-NEXT: mov r2, r4 489; CHECK-BE-NEXT: mov r3, r6 490; CHECK-BE-NEXT: vpop {d8, d9} 491; CHECK-BE-NEXT: add sp, #4 492; CHECK-BE-NEXT: pop {r4, r5, r6, r7, pc} 493; 494; CHECK-FP-LABEL: vector_add_f64: 495; CHECK-FP: @ %bb.0: @ %entry 496; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr} 497; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr} 498; CHECK-FP-NEXT: .pad #4 499; CHECK-FP-NEXT: sub sp, #4 500; CHECK-FP-NEXT: .vsave {d8, d9} 501; CHECK-FP-NEXT: vpush {d8, d9} 502; CHECK-FP-NEXT: mov r5, r2 503; CHECK-FP-NEXT: add r2, sp, #40 504; CHECK-FP-NEXT: vldrw.u32 q4, [r2] 505; CHECK-FP-NEXT: mov r4, r3 506; CHECK-FP-NEXT: vmov r2, r3, d8 507; CHECK-FP-NEXT: bl __aeabi_dadd 508; CHECK-FP-NEXT: vmov r2, r3, d9 509; CHECK-FP-NEXT: mov r6, r0 510; CHECK-FP-NEXT: mov r7, r1 511; CHECK-FP-NEXT: mov r0, r5 512; CHECK-FP-NEXT: mov r1, r4 513; CHECK-FP-NEXT: bl __aeabi_dadd 514; CHECK-FP-NEXT: mov r2, r0 515; CHECK-FP-NEXT: mov r3, r1 516; CHECK-FP-NEXT: mov r0, r6 517; CHECK-FP-NEXT: mov r1, r7 518; CHECK-FP-NEXT: vpop {d8, d9} 519; CHECK-FP-NEXT: add sp, #4 520; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc} 521entry: 522 %sum = fadd <2 x double> %lhs, %rhs 523 ret <2 x double> %sum 524} 525 526define <4 x i32> @insertextract(i32 %x, i32 %y) { 527; CHECK-LE-LABEL: insertextract: 528; CHECK-LE: @ %bb.0: 529; CHECK-LE-NEXT: mov r3, r1 530; CHECK-LE-NEXT: mov r1, r0 531; CHECK-LE-NEXT: mov r2, r0 532; CHECK-LE-NEXT: bx lr 533; 534; CHECK-BE-LABEL: insertextract: 535; CHECK-BE: @ %bb.0: 536; CHECK-BE-NEXT: mov r3, r1 537; CHECK-BE-NEXT: mov r1, r0 538; CHECK-BE-NEXT: mov r2, r0 539; CHECK-BE-NEXT: bx lr 540 %1 = insertelement <4 x i32> undef, i32 %x, i32 0 541 %2 = insertelement <4 x i32> %1, i32 %x, i32 1 542 %3 = insertelement <4 x i32> %2, i32 %x, i32 2 543 %4 = insertelement <4 x i32> %3, i32 %y, i32 3 544 ret <4 x i32> %4 545} 546 547declare void @print_uint32x4_t(<4 x i32> %val) 548define i32 @main(i64 %x, i64 %y) { 549; CHECK-LE-LABEL: main: 550; CHECK-LE: @ %bb.0: @ %entry 551; CHECK-LE-NEXT: .save {r4, lr} 552; CHECK-LE-NEXT: push {r4, lr} 553; CHECK-LE-NEXT: .vsave {d8, d9} 554; CHECK-LE-NEXT: vpush {d8, d9} 555; CHECK-LE-NEXT: .pad #8 556; CHECK-LE-NEXT: sub sp, #8 557; CHECK-LE-NEXT: vmov.32 q4[2], r2 558; CHECK-LE-NEXT: mov r4, r1 559; CHECK-LE-NEXT: mov r1, r0 560; CHECK-LE-NEXT: vmov.32 q4[3], r3 561; CHECK-LE-NEXT: movs r0, #0 562; CHECK-LE-NEXT: mov r2, r1 563; CHECK-LE-NEXT: mov r3, r4 564; CHECK-LE-NEXT: vstr d9, [sp] 565; CHECK-LE-NEXT: bl print_uint32x4_t 566; CHECK-LE-NEXT: movs r0, #0 567; CHECK-LE-NEXT: movs r2, #1 568; CHECK-LE-NEXT: mov r3, r4 569; CHECK-LE-NEXT: vstr d9, [sp] 570; CHECK-LE-NEXT: bl print_uint32x4_t 571; CHECK-LE-NEXT: movs r0, #0 572; CHECK-LE-NEXT: add sp, #8 573; CHECK-LE-NEXT: vpop {d8, d9} 574; CHECK-LE-NEXT: pop {r4, pc} 575; 576; CHECK-BE-LABEL: main: 577; CHECK-BE: @ %bb.0: @ %entry 578; CHECK-BE-NEXT: .save {r4, lr} 579; CHECK-BE-NEXT: push {r4, lr} 580; CHECK-BE-NEXT: .vsave {d8, d9} 581; CHECK-BE-NEXT: vpush {d8, d9} 582; CHECK-BE-NEXT: .pad #8 583; CHECK-BE-NEXT: sub sp, #8 584; CHECK-BE-NEXT: vmov.32 q0[2], r2 585; CHECK-BE-NEXT: mov r4, r1 586; CHECK-BE-NEXT: mov r1, r0 587; CHECK-BE-NEXT: vmov.32 q0[3], r3 588; CHECK-BE-NEXT: vrev64.32 q4, q0 589; CHECK-BE-NEXT: movs r0, #0 590; CHECK-BE-NEXT: mov r2, r1 591; CHECK-BE-NEXT: mov r3, r4 592; CHECK-BE-NEXT: vstr d9, [sp] 593; CHECK-BE-NEXT: bl print_uint32x4_t 594; CHECK-BE-NEXT: movs r0, #0 595; CHECK-BE-NEXT: movs r2, #1 596; CHECK-BE-NEXT: mov r3, r4 597; CHECK-BE-NEXT: vstr d9, [sp] 598; CHECK-BE-NEXT: bl print_uint32x4_t 599; CHECK-BE-NEXT: movs r0, #0 600; CHECK-BE-NEXT: add sp, #8 601; CHECK-BE-NEXT: vpop {d8, d9} 602; CHECK-BE-NEXT: pop {r4, pc} 603entry: 604 %a = insertelement <2 x i64> poison, i64 %x, i64 0 605 %b = insertelement <2 x i64> %a, i64 %y, i64 1 606 %c = bitcast <2 x i64> %b to <4 x i32> 607 %i = insertelement <4 x i32> %c, i32 1, i64 0 608 tail call void @print_uint32x4_t(i32 0, <4 x i32> %c) 609 tail call void @print_uint32x4_t(i32 0, <4 x i32> %i) 610 ret i32 0 611} 612