1; RUN: opt < %s -interleaved-access -S | FileCheck %s -check-prefix=NEON 2; RUN: opt < %s -mattr=-neon -interleaved-access -S | FileCheck %s -check-prefix=NO_NEON 3; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s -check-prefix=NEON 4; RUN: opt < %s -mattr=-neon -passes=interleaved-access -S | FileCheck %s -check-prefix=NO_NEON 5 6target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 7target triple = "aarch64--linux-gnu" 8 9define void @load_factor2(ptr %ptr) { 10; NEON-LABEL: @load_factor2( 11; NEON-NEXT: [[LDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %ptr) 12; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 1 13; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[LDN]], 0 14; NEON-NEXT: ret void 15; NO_NEON-LABEL: @load_factor2( 16; NO_NEON-NOT: @llvm.aarch64.neon 17; NO_NEON: ret void 18; 19 %interleaved.vec = load <16 x i8>, ptr %ptr, align 4 20 %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 21 %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 22 ret void 23} 24 25define void @load_factor3(ptr %ptr) { 26; NEON-LABEL: @load_factor3( 27; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr) 28; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2 29; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1 30; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0 31; NEON-NEXT: ret void 32; NO_NEON-LABEL: @load_factor3( 33; NO_NEON-NOT: @llvm.aarch64.neon 34; NO_NEON: ret void 35; 36 %interleaved.vec = load <12 x i32>, ptr %ptr, align 4 37 %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 38 %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 39 %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 40 ret void 41} 42 43define void @load_factor4(ptr %ptr) { 44; NEON-LABEL: @load_factor4( 45; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %ptr) 46; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3 47; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2 48; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1 49; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0 50; NEON-NEXT: ret void 51; NO_NEON-LABEL: @load_factor4( 52; NO_NEON-NOT: @llvm.aarch64.neon 53; NO_NEON: ret void 54; 55 %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 56 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 57 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 58 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 59 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 60 ret void 61} 62 63define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) { 64; NEON-LABEL: @store_factor2( 65; NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 66; NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 67; NEON-NEXT: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], ptr %ptr) 68; NEON-NEXT: ret void 69; NO_NEON-LABEL: @store_factor2( 70; NO_NEON-NOT: @llvm.aarch64.neon 71; NO_NEON: ret void 72; 73 %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 74 store <16 x i8> %interleaved.vec, ptr %ptr, align 4 75 ret void 76} 77 78define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { 79; NEON-LABEL: @store_factor3( 80; NEON: [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 81; NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 82; NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 83; NEON-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr) 84; NEON-NEXT: ret void 85; NO_NEON-LABEL: @store_factor3( 86; NO_NEON-NOT: @llvm.aarch64.neon 87; NO_NEON: ret void 88; 89 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 90 %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 91 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 92 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 93 ret void 94} 95 96define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { 97; NEON-LABEL: @store_factor4( 98; NEON: [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 99; NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 100; NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 101; NEON-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 102; NEON-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr %ptr) 103; NEON-NEXT: ret void 104; NO_NEON-LABEL: @store_factor4( 105; NO_NEON-NOT: @llvm.aarch64.neon 106; NO_NEON: ret void 107; 108 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 109 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 110 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 111 store <16 x i32> %interleaved.vec, ptr %ptr, align 4 112 ret void 113} 114 115define void @load_ptrvec_factor2(ptr %ptr) { 116; NEON-LABEL: @load_ptrvec_factor2( 117; NEON-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %ptr) 118; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1 119; NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr> 120; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0 121; NEON-NEXT: [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x ptr> 122; NEON-NEXT: ret void 123; NO_NEON-LABEL: @load_ptrvec_factor2( 124; NO_NEON-NOT: @llvm.aarch64.neon 125; NO_NEON: ret void 126; 127 %interleaved.vec = load <4 x ptr>, ptr %ptr, align 4 128 %v0 = shufflevector <4 x ptr> %interleaved.vec, <4 x ptr> poison, <2 x i32> <i32 0, i32 2> 129 %v1 = shufflevector <4 x ptr> %interleaved.vec, <4 x ptr> poison, <2 x i32> <i32 1, i32 3> 130 ret void 131} 132 133define void @load_ptrvec_factor3(ptr %ptr) { 134; NEON-LABEL: @load_ptrvec_factor3( 135; NEON-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %ptr) 136; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2 137; NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr> 138; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1 139; NEON-NEXT: [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x ptr> 140; NEON-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0 141; NEON-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr> 142; NEON-NEXT: ret void 143; NO_NEON-LABEL: @load_ptrvec_factor3( 144; NO_NEON-NOT: @llvm.aarch64.neon 145; NO_NEON: ret void 146; 147 %interleaved.vec = load <6 x ptr>, ptr %ptr, align 4 148 %v0 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> poison, <2 x i32> <i32 0, i32 3> 149 %v1 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> poison, <2 x i32> <i32 1, i32 4> 150 %v2 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> poison, <2 x i32> <i32 2, i32 5> 151 ret void 152} 153 154define void @load_ptrvec_factor4(ptr %ptr) { 155; NEON-LABEL: @load_ptrvec_factor4( 156; NEON-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %ptr) 157; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 3 158; NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i64> [[TMP2]] to <2 x ptr> 159; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 2 160; NEON-NEXT: [[TMP5:%.*]] = inttoptr <2 x i64> [[TMP4]] to <2 x ptr> 161; NEON-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1 162; NEON-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr> 163; NEON-NEXT: [[TMP8:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0 164; NEON-NEXT: [[TMP9:%.*]] = inttoptr <2 x i64> [[TMP8]] to <2 x ptr> 165; NEON-NEXT: ret void 166; NO_NEON-LABEL: @load_ptrvec_factor4( 167; NO_NEON-NOT: @llvm.aarch64.neon 168; NO_NEON: ret void 169; 170 %interleaved.vec = load <8 x ptr>, ptr %ptr, align 4 171 %v0 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 0, i32 4> 172 %v1 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 1, i32 5> 173 %v2 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 2, i32 6> 174 %v3 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> poison, <2 x i32> <i32 3, i32 7> 175 ret void 176} 177 178define void @store_ptrvec_factor2(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1) { 179; NEON-LABEL: @store_ptrvec_factor2( 180; NEON-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> %v0 to <2 x i64> 181; NEON-NEXT: [[TMP2:%.*]] = ptrtoint <2 x ptr> %v1 to <2 x i64> 182; NEON-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1> 183; NEON-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3> 184; NEON-NEXT: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr %ptr) 185; NEON-NEXT: ret void 186; NO_NEON-LABEL: @store_ptrvec_factor2( 187; NO_NEON-NOT: @llvm.aarch64.neon 188; NO_NEON: ret void 189; 190 %interleaved.vec = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 191 store <4 x ptr> %interleaved.vec, ptr %ptr, align 4 192 ret void 193} 194 195define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2) { 196; NEON-LABEL: @store_ptrvec_factor3( 197; NEON: [[TMP1:%.*]] = ptrtoint <4 x ptr> %s0 to <4 x i64> 198; NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> %s1 to <4 x i64> 199; NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1> 200; NEON-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3> 201; NEON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5> 202; NEON-NEXT: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr %ptr) 203; NEON-NEXT: ret void 204; NO_NEON-LABEL: @store_ptrvec_factor3( 205; NO_NEON-NOT: @llvm.aarch64.neon 206; NO_NEON: ret void 207; 208 %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 209 %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 210 %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> 211 store <6 x ptr> %interleaved.vec, ptr %ptr, align 4 212 ret void 213} 214 215define void @store_ptrvec_factor4(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2, <2 x ptr> %v3) { 216; NEON-LABEL: @store_ptrvec_factor4( 217; NEON: [[TMP1:%.*]] = ptrtoint <4 x ptr> %s0 to <4 x i64> 218; NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> %s1 to <4 x i64> 219; NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 0, i32 1> 220; NEON-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 2, i32 3> 221; NEON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 4, i32 5> 222; NEON-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP2]], <2 x i32> <i32 6, i32 7> 223; NEON-NEXT: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], ptr %ptr) 224; NEON-NEXT: ret void 225; NO_NEON-LABEL: @store_ptrvec_factor4( 226; NO_NEON-NOT: @llvm.aarch64.neon 227; NO_NEON: ret void 228; 229 %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 230 %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 231 %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 232 store <8 x ptr> %interleaved.vec, ptr %ptr, align 4 233 ret void 234} 235 236define void @load_undef_mask_factor2(ptr %ptr) { 237; NEON-LABEL: @load_undef_mask_factor2( 238; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %ptr) 239; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1 240; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0 241; NEON-NEXT: ret void 242; NO_NEON-LABEL: @load_undef_mask_factor2( 243; NO_NEON-NOT: @llvm.aarch64.neon 244; NO_NEON: ret void 245; 246 %interleaved.vec = load <8 x i32>, ptr %ptr, align 4 247 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6> 248 %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7> 249 ret void 250} 251 252define void @load_undef_mask_factor3(ptr %ptr) { 253; NEON-LABEL: @load_undef_mask_factor3( 254; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr) 255; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2 256; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1 257; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0 258; NEON-NEXT: ret void 259; NO_NEON-LABEL: @load_undef_mask_factor3( 260; NO_NEON-NOT: @llvm.aarch64.neon 261; NO_NEON: ret void 262; 263 %interleaved.vec = load <12 x i32>, ptr %ptr, align 4 264 %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 265 %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 266 %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> 267 ret void 268} 269 270define void @load_undef_mask_factor4(ptr %ptr) { 271; NEON-LABEL: @load_undef_mask_factor4( 272; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %ptr) 273; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3 274; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2 275; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1 276; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0 277; NEON-NEXT: ret void 278; NO_NEON-LABEL: @load_undef_mask_factor4( 279; NO_NEON-NOT: @llvm.aarch64.neon 280; NO_NEON: ret void 281; 282 %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 283 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef> 284 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef> 285 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef> 286 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef> 287 ret void 288} 289 290define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { 291; NEON-LABEL: @store_undef_mask_factor2( 292; NEON-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 293; NEON-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 294; NEON-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], ptr %ptr) 295; NEON-NEXT: ret void 296; NO_NEON-LABEL: @store_undef_mask_factor2( 297; NO_NEON-NOT: @llvm.aarch64.neon 298; NO_NEON: ret void 299; 300 %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7> 301 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 302 ret void 303} 304 305define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { 306; NEON-LABEL: @store_undef_mask_factor3( 307; NEON: [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 308; NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 309; NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 310; NEON-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr) 311; NEON-NEXT: ret void 312; NO_NEON-LABEL: @store_undef_mask_factor3( 313; NO_NEON-NOT: @llvm.aarch64.neon 314; NO_NEON: ret void 315; 316 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 317 %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 318 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 319 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 320 ret void 321} 322 323define void @store_undef_mask_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { 324; NEON-LABEL: @store_undef_mask_factor4( 325; NEON: [[TMP1:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 326; NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 327; NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 328; NEON-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> %s0, <8 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 329; NEON-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr %ptr) 330; NEON-NEXT: ret void 331; NO_NEON-LABEL: @store_undef_mask_factor4( 332; NO_NEON-NOT: @llvm.aarch64.neon 333; NO_NEON: ret void 334; 335 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 336 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 337 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 338 store <16 x i32> %interleaved.vec, ptr %ptr, align 4 339 ret void 340} 341 342define void @load_illegal_factor2(ptr %ptr) nounwind { 343; NEON-LABEL: @load_illegal_factor2( 344; NEON-NOT: @llvm.aarch64.neon 345; NEON: ret void 346; NO_NEON-LABEL: @load_illegal_factor2( 347; NO_NEON-NOT: @llvm.aarch64.neon 348; NO_NEON: ret void 349; 350 %interleaved.vec = load <3 x float>, ptr %ptr, align 16 351 %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef> 352 ret void 353} 354 355define void @store_illegal_factor2(ptr %ptr, <3 x float> %v0) nounwind { 356; NEON-LABEL: @store_illegal_factor2( 357; NEON-NOT: @llvm.aarch64.neon 358; NEON: ret void 359; NO_NEON-LABEL: @store_illegal_factor2( 360; NO_NEON-NOT: @llvm.aarch64.neon 361; NO_NEON: ret void 362; 363 %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> poison, <3 x i32> <i32 0, i32 2, i32 undef> 364 store <3 x float> %interleaved.vec, ptr %ptr, align 16 365 ret void 366} 367 368define void @store_general_mask_factor4(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 369; NEON-LABEL: @store_general_mask_factor4( 370; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5> 371; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17> 372; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33> 373; NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9> 374; NEON-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr) 375; NEON-NEXT: ret void 376; NO_NEON-LABEL: @store_general_mask_factor4( 377; NO_NEON-NOT: @llvm.aarch64.neon 378; NO_NEON: ret void 379; 380 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 381 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 382 ret void 383} 384 385define void @store_general_mask_factor4_undefbeg(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 386; NEON-LABEL: @store_general_mask_factor4_undefbeg( 387; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5> 388; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17> 389; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33> 390; NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9> 391; NEON-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr) 392; NEON-NEXT: ret void 393; NO_NEON-LABEL: @store_general_mask_factor4_undefbeg( 394; NO_NEON-NOT: @llvm.aarch64.neon 395; NO_NEON: ret void 396; 397 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 398 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 399 ret void 400} 401 402define void @store_general_mask_factor4_undefend(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 403; NEON-LABEL: @store_general_mask_factor4_undefend( 404; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5> 405; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17> 406; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33> 407; NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9> 408; NEON-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr) 409; NEON-NEXT: ret void 410; NO_NEON-LABEL: @store_general_mask_factor4_undefend( 411; NO_NEON-NOT: @llvm.aarch64.neon 412; NO_NEON: ret void 413; 414 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef> 415 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 416 ret void 417} 418 419define void @store_general_mask_factor4_undefmid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 420; NEON-LABEL: @store_general_mask_factor4_undefmid( 421; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5> 422; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 16, i32 17> 423; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 32, i32 33> 424; NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9> 425; NEON-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr) 426; NEON-NEXT: ret void 427; NO_NEON-LABEL: @store_general_mask_factor4_undefmid( 428; NO_NEON-NOT: @llvm.aarch64.neon 429; NO_NEON: ret void 430; 431 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9> 432 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 433 ret void 434} 435 436define void @store_general_mask_factor4_undefmulti(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 437; NEON-LABEL: @store_general_mask_factor4_undefmulti( 438; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 4, i32 5> 439; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1> 440; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 0, i32 1> 441; NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <2 x i32> <i32 8, i32 9> 442; NEON-NEXT: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr %ptr) 443; NEON-NEXT: ret void 444; NO_NEON-LABEL: @store_general_mask_factor4_undefmulti( 445; NO_NEON-NOT: @llvm.aarch64.neon 446; NO_NEON: ret void 447; 448 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9> 449 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 450 ret void 451} 452 453define void @store_general_mask_factor3(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 454; NEON-LABEL: @store_general_mask_factor3( 455; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 456; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35> 457; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19> 458; NEON-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr) 459; NEON-NEXT: ret void 460; NO_NEON-LABEL: @store_general_mask_factor3( 461; NO_NEON-NOT: @llvm.aarch64.neon 462; NO_NEON: ret void 463; 464 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19> 465 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 466 ret void 467} 468 469define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 470; NEON-LABEL: @store_general_mask_factor3_undefmultimid( 471; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 472; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35> 473; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19> 474; NEON-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr) 475; NEON-NEXT: ret void 476; NO_NEON-LABEL: @store_general_mask_factor3_undefmultimid( 477; NO_NEON-NOT: @llvm.aarch64.neon 478; NO_NEON: ret void 479; 480 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19> 481 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 482 ret void 483} 484 485define void @store_general_mask_factor3_undef_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 486; NEON-LABEL: @store_general_mask_factor3_undef_fail( 487; NEON-NOT: @llvm.aarch64.neon 488; NEON: ret void 489; NO_NEON-LABEL: @store_general_mask_factor3_undef_fail( 490; NO_NEON-NOT: @llvm.aarch64.neon 491; NO_NEON: ret void 492; 493 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19> 494 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 495 ret void 496} 497 498define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 499; NEON-LABEL: @store_general_mask_factor3_undeflane( 500; NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 501; NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 32, i32 33, i32 34, i32 35> 502; NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> %v0, <32 x i32> %v1, <4 x i32> <i32 16, i32 17, i32 18, i32 19> 503; NEON-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr) 504; NEON-NEXT: ret void 505; NO_NEON-LABEL: @store_general_mask_factor3_undeflane( 506; NO_NEON-NOT: @llvm.aarch64.neon 507; NO_NEON: ret void 508; 509 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19> 510 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 511 ret void 512} 513 514define void @store_general_mask_factor3_negativestart(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 515; NEON-LABEL: @store_general_mask_factor3_negativestart( 516; NEON-NOT: @llvm.aarch64.neon 517; NEON: ret void 518; NO_NEON-LABEL: @store_general_mask_factor3_negativestart( 519; NO_NEON-NOT: @llvm.aarch64.neon 520; NO_NEON: ret void 521; 522 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19> 523 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 524 ret void 525} 526 527@g = external global <4 x float> 528 529; The following does not give a valid interleaved store 530; NEON-LABEL: define void @no_interleave 531; NEON-NOT: call void @llvm.aarch64.neon.st2 532; NEON: shufflevector 533; NEON: store 534; NEON: ret void 535; NO_NEON-LABEL: define void @no_interleave 536; NO_NEON: shufflevector 537; NO_NEON: store 538; NO_NEON: ret void 539define void @no_interleave(<4 x float> %a0) { 540 %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 3, i32 7, i32 undef> 541 store <4 x float> %v0, ptr @g, align 16 542 ret void 543} 544 545define void @load_factor2_wide2(ptr %ptr) { 546; NEON-LABEL: @load_factor2_wide2( 547; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %ptr) 548; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1 549; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0 550; NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr %ptr, i32 8 551; NEON-NEXT: [[LDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[TMP5]]) 552; NEON-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 1 553; NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 0 554; NEON-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 555; NEON-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 556; NEON-NEXT: ret void 557; NO_NEON-LABEL: @load_factor2_wide2( 558; NO_NEON-NOT: @llvm.aarch64.neon 559; NO_NEON: ret void 560; 561 %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 562 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 563 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 564 ret void 565} 566 567define void @load_factor2_wide3(ptr %ptr) { 568; NEON-LABEL: @load_factor2_wide3( 569; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[PTR:%.*]]) 570; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1 571; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0 572; NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 573; NEON-NEXT: [[LDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[TMP5]]) 574; NEON-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 1 575; NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN1]], 0 576; NEON-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 8 577; NEON-NEXT: [[LDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[TMP9]]) 578; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 1 579; NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN2]], 0 580; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 581; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 582; NEON-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 583; NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 584; NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 585; NEON-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 586; NEON-NEXT: ret void 587; NO_NEON-LABEL: @load_factor2_wide3( 588; NO_NEON-NOT: @llvm.aarch64.neon 589; NO_NEON: ret void 590; 591 %interleaved.vec = load <24 x i32>, ptr %ptr, align 4 592 %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22> 593 %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23> 594 ret void 595} 596 597define void @load_factor3_wide(ptr %ptr) { 598; NEON-LABEL: @load_factor3_wide( 599; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %ptr) 600; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2 601; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1 602; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0 603; NEON-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr %ptr, i32 12 604; NEON-NEXT: [[LDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[TMP6]]) 605; NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 2 606; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 1 607; NEON-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 0 608; NEON-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 609; NEON-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 610; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 611; NEON-NEXT: ret void 612; NO_NEON-LABEL: @load_factor3_wide( 613; NO_NEON-NOT: @llvm.aarch64.neon 614; NO_NEON: ret void 615; 616 %interleaved.vec = load <24 x i32>, ptr %ptr, align 4 617 %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 618 %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 619 %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 620 ret void 621} 622 623define void @load_factor4_wide(ptr %ptr) { 624; NEON-LABEL: @load_factor4_wide( 625; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %ptr) 626; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 3 627; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 2 628; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 1 629; NEON-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN]], 0 630; NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr %ptr, i32 16 631; NEON-NEXT: [[LDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr [[TMP7]]) 632; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 3 633; NEON-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 2 634; NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 1 635; NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[LDN1]], 0 636; NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 637; NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 638; NEON-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 639; NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 640; NEON-NEXT: ret void 641; NO_NEON-LABEL: @load_factor4_wide( 642; NO_NEON-NOT: @llvm.aarch64.neon 643; NO_NEON: ret void 644; 645 %interleaved.vec = load <32 x i32>, ptr %ptr, align 4 646 %v0 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 647 %v1 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 648 %v2 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 649 %v3 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> poison, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 650 ret void 651} 652 653define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) { 654; NEON-LABEL: @store_factor2_wide( 655; NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 656; NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 657; NEON-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr %ptr) 658; NEON-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 659; NEON-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> %v0, <8 x i32> %v1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 660; NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr %ptr, i32 8 661; NEON-NEXT: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP5]], <4 x i32> [[TMP6]], ptr [[TMP7]]) 662; NEON-NEXT: ret void 663; NO_NEON-LABEL: @store_factor2_wide( 664; NO_NEON: ret void 665; 666 %interleaved.vec = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 667 store <16 x i32> %interleaved.vec, ptr %ptr, align 4 668 ret void 669} 670 671define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) { 672; NEON-LABEL: @store_factor3_wide( 673; NEON: [[TMP2:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 674; NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 675; NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 16, i32 17, i32 18, i32 19> 676; NEON-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr %ptr) 677; NEON-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 678; NEON-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 679; NEON-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 20, i32 21, i32 22, i32 23> 680; NEON-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr %ptr, i32 12 681; NEON-NEXT: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr [[TMP9]]) 682; NEON-NEXT: ret void 683; NO_NEON-LABEL: @store_factor3_wide( 684; NO_NEON: ret void 685; 686 %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 687 %s1 = shufflevector <8 x i32> %v2, <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 688 %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> 689 store <24 x i32> %interleaved.vec, ptr %ptr, align 4 690 ret void 691} 692 693define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2, <8 x i32> %v3) { 694; NEON-LABEL: @store_factor4_wide( 695; NEON: [[TMP2:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 696; NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 8, i32 9, i32 10, i32 11> 697; NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 16, i32 17, i32 18, i32 19> 698; NEON-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 24, i32 25, i32 26, i32 27> 699; NEON-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr %ptr) 700; NEON-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 701; NEON-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> 702; NEON-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 20, i32 21, i32 22, i32 23> 703; NEON-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> %s0, <16 x i32> %s1, <4 x i32> <i32 28, i32 29, i32 30, i32 31> 704; NEON-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr %ptr, i32 16 705; NEON-NEXT: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], ptr [[TMP11]]) 706; NEON-NEXT: ret void 707; NO_NEON-LABEL: @store_factor4_wide( 708; NO_NEON-NOT: @llvm.aarch64.neon 709; NO_NEON: ret void 710; 711 %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 712 %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 713 %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> 714 store <32 x i32> %interleaved.vec, ptr %ptr, align 4 715 ret void 716} 717 718define void @load_factor2_fp128(ptr %ptr) { 719; NEON-LABEL: @load_factor2_fp128( 720; NEON-NOT: @llvm.aarch64.neon 721; NEON: ret void 722; NO_NEON-LABEL: @load_factor2_fp128( 723; NO_NEON-NOT: @llvm.aarch64.neon 724; NO_NEON: ret void 725; 726 %interleaved.vec = load <4 x fp128>, ptr %ptr, align 16 727 %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 0, i32 2> 728 %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> poison, <2 x i32> <i32 1, i32 3> 729 ret void 730} 731 732define <4 x i1> @load_large_vector(ptr %p) { 733; NEON-LABEL: @load_large_vector( 734; NEON: [[LDN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr 735; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 1 736; NEON-NEXT: [[TMP2:%.*]] = inttoptr <2 x i64> [[TMP1]] to <2 x ptr> 737; NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN]], 0 738; NEON-NEXT: [[TMP4:%.*]] = inttoptr <2 x i64> [[TMP3]] to <2 x ptr> 739; NEON: [[LDN1:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr 740; NEON-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN1]], 1 741; NEON-NEXT: [[TMP6:%.*]] = inttoptr <2 x i64> [[TMP5]] to <2 x ptr> 742; NEON-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[LDN1]], 0 743; NEON-NEXT: [[TMP8:%.*]] = inttoptr <2 x i64> [[TMP7]] to <2 x ptr> 744; NEON-NEXT: shufflevector <2 x ptr> [[TMP2]], <2 x ptr> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 745; NEON-NEXT: shufflevector <2 x ptr> [[TMP4]], <2 x ptr> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 746; NO_NEON-LABEL: @load_large_vector( 747; NO_NEON-NOT: @llvm.aarch64.neon 748; NO_NEON: ret 749; 750 %l = load <12 x ptr>, ptr %p 751 %s1 = shufflevector <12 x ptr> %l, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 752 %s2 = shufflevector <12 x ptr> %l, <12 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 753 %ret = icmp ne <4 x ptr> %s1, %s2 754 ret <4 x i1> %ret 755} 756