1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm64-apple-ios -mattr=+sve -o - %s | FileCheck %s 3; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s 4; RUN: llc -mtriple=arm64-apple-ios -mattr=+global-isel -mattr=+sve -o - %s | FileCheck %s 5; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+global-isel -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s 6 7define void @zext_of_concat(ptr %a, ptr %b, ptr %c, ptr %d) nounwind { 8; CHECK-LABEL: zext_of_concat: 9; CHECK: ; %bb.0: 10; CHECK-NEXT: ldr d0, [x0] 11; CHECK-NEXT: ldr d1, [x1] 12; CHECK-NEXT: add.2s v0, v0, v1 13; CHECK-NEXT: ldr q1, [x2] 14; CHECK-NEXT: ushll.2d v0, v0, #0 15; CHECK-NEXT: add.4s v0, v0, v1 16; CHECK-NEXT: str q0, [x2] 17; CHECK-NEXT: ret 18; 19; CHECK-BE-LABEL: zext_of_concat: 20; CHECK-BE: // %bb.0: 21; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] 22; CHECK-BE-NEXT: ld1 { v1.2s }, [x1] 23; CHECK-BE-NEXT: add v0.2s, v0.2s, v1.2s 24; CHECK-BE-NEXT: movi v1.2d, #0000000000000000 25; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s 26; CHECK-BE-NEXT: trn2 v0.4s, v0.4s, v1.4s 27; CHECK-BE-NEXT: ld1 { v1.4s }, [x2] 28; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s 29; CHECK-BE-NEXT: st1 { v0.4s }, [x2] 30; CHECK-BE-NEXT: ret 31 %i0.a = load <2 x i32>, ptr %a 32 %i0.b = load <2 x i32>, ptr %b 33 %i0 = add <2 x i32> %i0.a, %i0.b 34 %i1 = shufflevector <2 x i32> %i0, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 35 %i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 36 %i3 = load <4 x i32>, ptr %c 37 %i4 = add <4 x i32> %i2, %i3 38 store <4 x i32> %i4, ptr %c 39 ret void 40} 41 42define void @zext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind { 43; CHECK-LABEL: zext_of_concat_extrause: 44; CHECK: ; %bb.0: 45; CHECK-NEXT: ldr d0, [x1] 46; CHECK-NEXT: ldr d1, [x0] 47; CHECK-NEXT: add.2s v0, v1, v0 48; CHECK-NEXT: movi.2d v1, #0000000000000000 49; CHECK-NEXT: mov.d v0[1], v0[0] 50; CHECK-NEXT: zip1.4s v1, v0, v1 51; CHECK-NEXT: str q0, [x4] 52; CHECK-NEXT: ldr q0, [x2] 53; CHECK-NEXT: add.4s v0, v1, v0 54; CHECK-NEXT: str q0, [x2] 55; CHECK-NEXT: ret 56; 57; CHECK-BE-LABEL: zext_of_concat_extrause: 58; CHECK-BE: // %bb.0: 59; CHECK-BE-NEXT: ld1 { v0.2s }, [x1] 60; CHECK-BE-NEXT: ld1 { v1.2s }, [x0] 61; CHECK-BE-NEXT: movi v2.2d, #0000000000000000 62; CHECK-BE-NEXT: add v0.2s, v1.2s, v0.2s 63; CHECK-BE-NEXT: mov v0.d[1], v0.d[0] 64; CHECK-BE-NEXT: zip1 v1.4s, v0.4s, v0.4s 65; CHECK-BE-NEXT: st1 { v0.4s }, [x4] 66; CHECK-BE-NEXT: trn2 v0.4s, v1.4s, v2.4s 67; CHECK-BE-NEXT: ld1 { v1.4s }, [x2] 68; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s 69; CHECK-BE-NEXT: st1 { v0.4s }, [x2] 70; CHECK-BE-NEXT: ret 71 %i0.a = load <2 x i32>, ptr %a 72 %i0.b = load <2 x i32>, ptr %b 73 %i0 = add <2 x i32> %i0.a, %i0.b 74 %i1 = shufflevector <2 x i32> %i0, <2 x i32> %i0, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 75 store <4 x i32> %i1, ptr %e 76 %i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7> 77 %i3 = load <4 x i32>, ptr %c 78 %i4 = add <4 x i32> %i2, %i3 79 store <4 x i32> %i4, ptr %c 80 ret void 81} 82 83define void @aext_of_concat(ptr %a, ptr %b, ptr %c, ptr %d) nounwind { 84; CHECK-LABEL: aext_of_concat: 85; CHECK: ; %bb.0: 86; CHECK-NEXT: ldr d0, [x0] 87; CHECK-NEXT: ldr d1, [x1] 88; CHECK-NEXT: add.2s v0, v0, v1 89; CHECK-NEXT: ldr q1, [x2] 90; CHECK-NEXT: ushll.2d v0, v0, #0 91; CHECK-NEXT: add.4s v0, v0, v1 92; CHECK-NEXT: str q0, [x2] 93; CHECK-NEXT: ret 94; 95; CHECK-BE-LABEL: aext_of_concat: 96; CHECK-BE: // %bb.0: 97; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] 98; CHECK-BE-NEXT: ld1 { v1.2s }, [x1] 99; CHECK-BE-NEXT: add v0.2s, v0.2s, v1.2s 100; CHECK-BE-NEXT: ld1 { v1.4s }, [x2] 101; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s 102; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s 103; CHECK-BE-NEXT: st1 { v0.4s }, [x2] 104; CHECK-BE-NEXT: ret 105 %i0.a = load <2 x i32>, ptr %a 106 %i0.b = load <2 x i32>, ptr %b 107 %i0 = add <2 x i32> %i0.a, %i0.b 108 %i1 = shufflevector <2 x i32> %i0, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 109 %i2 = shufflevector <4 x i32> %i1, <4 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef> 110 %i3 = load <4 x i32>, ptr %c 111 %i4 = add <4 x i32> %i2, %i3 112 store <4 x i32> %i4, ptr %c 113 ret void 114} 115 116define void @aext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind { 117; CHECK-LABEL: aext_of_concat_extrause: 118; CHECK: ; %bb.0: 119; CHECK-NEXT: ldr d0, [x1] 120; CHECK-NEXT: ldr d1, [x0] 121; CHECK-NEXT: add.2s v0, v1, v0 122; CHECK-NEXT: mov.16b v1, v0 123; CHECK-NEXT: mov.d v1[1], v0[0] 124; CHECK-NEXT: zip1.4s v0, v0, v0 125; CHECK-NEXT: str q1, [x4] 126; CHECK-NEXT: ldr q1, [x2] 127; CHECK-NEXT: add.4s v0, v0, v1 128; CHECK-NEXT: str q0, [x2] 129; CHECK-NEXT: ret 130; 131; CHECK-BE-LABEL: aext_of_concat_extrause: 132; CHECK-BE: // %bb.0: 133; CHECK-BE-NEXT: ld1 { v0.2s }, [x1] 134; CHECK-BE-NEXT: ld1 { v1.2s }, [x0] 135; CHECK-BE-NEXT: add v0.2s, v1.2s, v0.2s 136; CHECK-BE-NEXT: mov v1.16b, v0.16b 137; CHECK-BE-NEXT: mov v1.d[1], v0.d[0] 138; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s 139; CHECK-BE-NEXT: st1 { v1.4s }, [x4] 140; CHECK-BE-NEXT: ld1 { v1.4s }, [x2] 141; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s 142; CHECK-BE-NEXT: st1 { v0.4s }, [x2] 143; CHECK-BE-NEXT: ret 144 %i0.a = load <2 x i32>, ptr %a 145 %i0.b = load <2 x i32>, ptr %b 146 %i0 = add <2 x i32> %i0.a, %i0.b 147 %i1 = shufflevector <2 x i32> %i0, <2 x i32> %i0, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 148 store <4 x i32> %i1, ptr %e 149 %i2 = shufflevector <4 x i32> %i1, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef> 150 %i3 = load <4 x i32>, ptr %c 151 %i4 = add <4 x i32> %i2, %i3 152 store <4 x i32> %i4, ptr %c 153 ret void 154} 155