1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mattr=+neon -interleaved-access -S | FileCheck %s --check-prefix=CHECK-NEON 3; RUN: opt < %s -mattr=+mve.fp -interleaved-access -S | FileCheck %s --check-prefix=CHECK-MVE 4; RUN: opt < %s -interleaved-access -S | FileCheck %s --check-prefix=CHECK-NONE 5; RUN: opt < %s -mattr=+neon -passes=interleaved-access -S | FileCheck %s --check-prefix=CHECK-NEON 6; RUN: opt < %s -mattr=+mve.fp -passes=interleaved-access -S | FileCheck %s --check-prefix=CHECK-MVE 7; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s --check-prefix=CHECK-NONE 8 9target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" 10target triple = "arm---eabi" 11 12define void @load_factor2(ptr %ptr) { 13; CHECK-NEON-LABEL: @load_factor2( 14; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0(ptr [[PTR:%.*]], i32 4) 15; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 1 16; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLDN]], 0 17; CHECK-NEON-NEXT: ret void 18; 19; CHECK-MVE-LABEL: @load_factor2( 20; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 4 21; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 22; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 23; CHECK-MVE-NEXT: ret void 24; 25; CHECK-NONE-LABEL: @load_factor2( 26; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 4 27; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 28; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i8> [[INTERLEAVED_VEC]], <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 29; CHECK-NONE-NEXT: ret void 30; 31 %interleaved.vec = load <16 x i8>, ptr %ptr, align 4 32 %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 33 %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 34 ret void 35} 36 37define void @load_factor3(ptr %ptr) { 38; CHECK-NEON-LABEL: @load_factor3( 39; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0(ptr [[PTR:%.*]], i32 4) 40; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2 41; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1 42; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0 43; CHECK-NEON-NEXT: ret void 44; 45; CHECK-MVE-LABEL: @load_factor3( 46; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x i32>, ptr [[PTR:%.*]], align 4 47; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 0, i32 3> 48; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 1, i32 4> 49; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 2, i32 5> 50; CHECK-MVE-NEXT: ret void 51; 52; CHECK-NONE-LABEL: @load_factor3( 53; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x i32>, ptr [[PTR:%.*]], align 4 54; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 0, i32 3> 55; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 1, i32 4> 56; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <6 x i32> [[INTERLEAVED_VEC]], <6 x i32> undef, <2 x i32> <i32 2, i32 5> 57; CHECK-NONE-NEXT: ret void 58; 59 %interleaved.vec = load <6 x i32>, ptr %ptr, align 4 60 %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 0, i32 3> 61 %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 1, i32 4> 62 %v2 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> undef, <2 x i32> <i32 2, i32 5> 63 ret void 64} 65 66define void @load_factor4(ptr %ptr) { 67; CHECK-NEON-LABEL: @load_factor4( 68; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[PTR:%.*]], i32 4) 69; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3 70; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2 71; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1 72; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0 73; CHECK-NEON-NEXT: ret void 74; 75; CHECK-MVE-LABEL: @load_factor4( 76; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 77; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 78; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 79; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 80; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 81; CHECK-MVE-NEXT: ret void 82; 83; CHECK-NONE-LABEL: @load_factor4( 84; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 85; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 86; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 87; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 88; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 89; CHECK-NONE-NEXT: ret void 90; 91 %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 92 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 93 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 94 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 95 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 96 ret void 97} 98 99define void @store_factor2(ptr %ptr, <8 x i8> %v0, <8 x i8> %v1) { 100; CHECK-NEON-LABEL: @store_factor2( 101; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 102; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[V0]], <8 x i8> [[V1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 103; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0.v8i8(ptr [[PTR:%.*]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 4) 104; CHECK-NEON-NEXT: ret void 105; 106; CHECK-MVE-LABEL: @store_factor2( 107; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 108; CHECK-MVE-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 109; CHECK-MVE-NEXT: ret void 110; 111; CHECK-NONE-LABEL: @store_factor2( 112; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[V0:%.*]], <8 x i8> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 113; CHECK-NONE-NEXT: store <16 x i8> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 114; CHECK-NONE-NEXT: ret void 115; 116 %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 117 store <16 x i8> %interleaved.vec, ptr %ptr, align 4 118 ret void 119} 120 121define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { 122; CHECK-NEON-LABEL: @store_factor3( 123; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 124; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 125; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 126; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 127; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 128; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 129; CHECK-NEON-NEXT: ret void 130; 131; CHECK-MVE-LABEL: @store_factor3( 132; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 133; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 134; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 135; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 136; CHECK-MVE-NEXT: ret void 137; 138; CHECK-NONE-LABEL: @store_factor3( 139; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 140; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 141; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 142; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 143; CHECK-NONE-NEXT: ret void 144; 145 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 146 %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 147 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 148 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 149 ret void 150} 151 152define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { 153; CHECK-NEON-LABEL: @store_factor4( 154; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 155; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 156; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 157; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 158; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 159; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 160; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4) 161; CHECK-NEON-NEXT: ret void 162; 163; CHECK-MVE-LABEL: @store_factor4( 164; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 165; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 166; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 167; CHECK-MVE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 168; CHECK-MVE-NEXT: ret void 169; 170; CHECK-NONE-LABEL: @store_factor4( 171; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 172; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 173; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 174; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 175; CHECK-NONE-NEXT: ret void 176; 177 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 178 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 179 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 180 store <16 x i32> %interleaved.vec, ptr %ptr, align 4 181 ret void 182} 183 184define void @load_ptrvec_factor2(ptr %ptr) { 185; CHECK-NEON-LABEL: @load_ptrvec_factor2( 186; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0(ptr [[PTR:%.*]], i32 4) 187; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLDN]], 0 188; CHECK-NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x ptr> 189; CHECK-NEON-NEXT: ret void 190; 191; CHECK-MVE-LABEL: @load_ptrvec_factor2( 192; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x ptr>, ptr [[PTR:%.*]], align 4 193; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x ptr> [[INTERLEAVED_VEC]], <4 x ptr> undef, <2 x i32> <i32 0, i32 2> 194; CHECK-MVE-NEXT: ret void 195; 196; CHECK-NONE-LABEL: @load_ptrvec_factor2( 197; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x ptr>, ptr [[PTR:%.*]], align 4 198; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x ptr> [[INTERLEAVED_VEC]], <4 x ptr> undef, <2 x i32> <i32 0, i32 2> 199; CHECK-NONE-NEXT: ret void 200; 201 %interleaved.vec = load <4 x ptr>, ptr %ptr, align 4 202 %v0 = shufflevector <4 x ptr> %interleaved.vec, <4 x ptr> undef, <2 x i32> <i32 0, i32 2> 203 ret void 204} 205 206define void @load_ptrvec_factor3(ptr %ptr) { 207; CHECK-NEON-LABEL: @load_ptrvec_factor3( 208; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0(ptr [[PTR:%.*]], i32 4) 209; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2 210; CHECK-NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x ptr> 211; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1 212; CHECK-NEON-NEXT: [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x ptr> 213; CHECK-NEON-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0 214; CHECK-NEON-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr> 215; CHECK-NEON-NEXT: ret void 216; 217; CHECK-MVE-LABEL: @load_ptrvec_factor3( 218; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x ptr>, ptr [[PTR:%.*]], align 4 219; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 0, i32 3> 220; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 1, i32 4> 221; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 2, i32 5> 222; CHECK-MVE-NEXT: ret void 223; 224; CHECK-NONE-LABEL: @load_ptrvec_factor3( 225; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <6 x ptr>, ptr [[PTR:%.*]], align 4 226; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 0, i32 3> 227; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 1, i32 4> 228; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <6 x ptr> [[INTERLEAVED_VEC]], <6 x ptr> undef, <2 x i32> <i32 2, i32 5> 229; CHECK-NONE-NEXT: ret void 230; 231 %interleaved.vec = load <6 x ptr>, ptr %ptr, align 4 232 %v0 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> undef, <2 x i32> <i32 0, i32 3> 233 %v1 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> undef, <2 x i32> <i32 1, i32 4> 234 %v2 = shufflevector <6 x ptr> %interleaved.vec, <6 x ptr> undef, <2 x i32> <i32 2, i32 5> 235 ret void 236} 237 238define void @load_ptrvec_factor4(ptr %ptr) { 239; CHECK-NEON-LABEL: @load_ptrvec_factor4( 240; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0(ptr [[PTR:%.*]], i32 4) 241; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 3 242; CHECK-NEON-NEXT: [[TMP3:%.*]] = inttoptr <2 x i32> [[TMP2]] to <2 x ptr> 243; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2 244; CHECK-NEON-NEXT: [[TMP5:%.*]] = inttoptr <2 x i32> [[TMP4]] to <2 x ptr> 245; CHECK-NEON-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1 246; CHECK-NEON-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr> 247; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0 248; CHECK-NEON-NEXT: [[TMP9:%.*]] = inttoptr <2 x i32> [[TMP8]] to <2 x ptr> 249; CHECK-NEON-NEXT: ret void 250; 251; CHECK-MVE-LABEL: @load_ptrvec_factor4( 252; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x ptr>, ptr [[PTR:%.*]], align 4 253; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 0, i32 4> 254; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 1, i32 5> 255; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 2, i32 6> 256; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 3, i32 7> 257; CHECK-MVE-NEXT: ret void 258; 259; CHECK-NONE-LABEL: @load_ptrvec_factor4( 260; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x ptr>, ptr [[PTR:%.*]], align 4 261; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 0, i32 4> 262; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 1, i32 5> 263; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 2, i32 6> 264; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <8 x ptr> [[INTERLEAVED_VEC]], <8 x ptr> undef, <2 x i32> <i32 3, i32 7> 265; CHECK-NONE-NEXT: ret void 266; 267 %interleaved.vec = load <8 x ptr>, ptr %ptr, align 4 268 %v0 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 0, i32 4> 269 %v1 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 1, i32 5> 270 %v2 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 2, i32 6> 271 %v3 = shufflevector <8 x ptr> %interleaved.vec, <8 x ptr> undef, <2 x i32> <i32 3, i32 7> 272 ret void 273} 274 275define void @store_ptrvec_factor2(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1) { 276; CHECK-NEON-LABEL: @store_ptrvec_factor2( 277; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <2 x ptr> [[V0:%.*]] to <2 x i32> 278; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <2 x ptr> [[V1:%.*]] to <2 x i32> 279; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1> 280; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3> 281; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4) 282; CHECK-NEON-NEXT: ret void 283; 284; CHECK-MVE-LABEL: @store_ptrvec_factor2( 285; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3> 286; CHECK-MVE-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 287; CHECK-MVE-NEXT: ret void 288; 289; CHECK-NONE-LABEL: @store_ptrvec_factor2( 290; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3> 291; CHECK-NONE-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 292; CHECK-NONE-NEXT: ret void 293; 294 %interleaved.vec = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 295 store <4 x ptr> %interleaved.vec, ptr %ptr, align 4 296 ret void 297} 298 299define void @store_ptrvec_factor3(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2) { 300; CHECK-NEON-LABEL: @store_ptrvec_factor3( 301; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 302; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 303; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[S0]] to <4 x i32> 304; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[S1]] to <4 x i32> 305; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1> 306; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3> 307; CHECK-NEON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5> 308; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], i32 4) 309; CHECK-NEON-NEXT: ret void 310; 311; CHECK-MVE-LABEL: @store_ptrvec_factor3( 312; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 313; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 314; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> 315; CHECK-MVE-NEXT: store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 316; CHECK-MVE-NEXT: ret void 317; 318; CHECK-NONE-LABEL: @store_ptrvec_factor3( 319; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 320; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 321; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> 322; CHECK-NONE-NEXT: store <6 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 323; CHECK-NONE-NEXT: ret void 324; 325 %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 326 %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 327 %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5> 328 store <6 x ptr> %interleaved.vec, ptr %ptr, align 4 329 ret void 330} 331 332define void @store_ptrvec_factor4(ptr %ptr, <2 x ptr> %v0, <2 x ptr> %v1, <2 x ptr> %v2, <2 x ptr> %v3) { 333; CHECK-NEON-LABEL: @store_ptrvec_factor4( 334; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 335; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 336; CHECK-NEON-NEXT: [[TMP1:%.*]] = ptrtoint <4 x ptr> [[S0]] to <4 x i32> 337; CHECK-NEON-NEXT: [[TMP2:%.*]] = ptrtoint <4 x ptr> [[S1]] to <4 x i32> 338; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 0, i32 1> 339; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 2, i32 3> 340; CHECK-NEON-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 4, i32 5> 341; CHECK-NEON-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <2 x i32> <i32 6, i32 7> 342; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> [[TMP6]], i32 4) 343; CHECK-NEON-NEXT: ret void 344; 345; CHECK-MVE-LABEL: @store_ptrvec_factor4( 346; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 347; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 348; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 349; CHECK-MVE-NEXT: store <8 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 350; CHECK-MVE-NEXT: ret void 351; 352; CHECK-NONE-LABEL: @store_ptrvec_factor4( 353; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <2 x ptr> [[V0:%.*]], <2 x ptr> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 354; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <2 x ptr> [[V2:%.*]], <2 x ptr> [[V3:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 355; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[S0]], <4 x ptr> [[S1]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 356; CHECK-NONE-NEXT: store <8 x ptr> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 357; CHECK-NONE-NEXT: ret void 358; 359 %s0 = shufflevector <2 x ptr> %v0, <2 x ptr> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 360 %s1 = shufflevector <2 x ptr> %v2, <2 x ptr> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 361 %interleaved.vec = shufflevector <4 x ptr> %s0, <4 x ptr> %s1, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 362 store <8 x ptr> %interleaved.vec, ptr %ptr, align 4 363 ret void 364} 365 366define void @load_undef_mask_factor2(ptr %ptr) { 367; CHECK-NEON-LABEL: @load_undef_mask_factor2( 368; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4) 369; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 370; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 371; CHECK-NEON-NEXT: ret void 372; 373; CHECK-MVE-LABEL: @load_undef_mask_factor2( 374; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]]) 375; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 376; CHECK-MVE-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 377; CHECK-MVE-NEXT: ret void 378; 379; CHECK-NONE-LABEL: @load_undef_mask_factor2( 380; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr [[PTR:%.*]], align 4 381; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> <i32 poison, i32 2, i32 poison, i32 6> 382; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <4 x i32> <i32 poison, i32 3, i32 poison, i32 7> 383; CHECK-NONE-NEXT: ret void 384; 385 %interleaved.vec = load <8 x i32>, ptr %ptr, align 4 386 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6> 387 %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7> 388 ret void 389} 390 391define void @load_undef_mask_factor3(ptr %ptr) { 392; CHECK-NEON-LABEL: @load_undef_mask_factor3( 393; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[PTR:%.*]], i32 4) 394; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2 395; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1 396; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0 397; CHECK-NEON-NEXT: ret void 398; 399; CHECK-MVE-LABEL: @load_undef_mask_factor3( 400; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4 401; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 402; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 403; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 404; CHECK-MVE-NEXT: ret void 405; 406; CHECK-NONE-LABEL: @load_undef_mask_factor3( 407; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <12 x i32>, ptr [[PTR:%.*]], align 4 408; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 409; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 410; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <12 x i32> [[INTERLEAVED_VEC]], <12 x i32> undef, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 411; CHECK-NONE-NEXT: ret void 412; 413 %interleaved.vec = load <12 x i32>, ptr %ptr, align 4 414 %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 415 %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 416 %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> 417 ret void 418} 419 420define void @load_undef_mask_factor4(ptr %ptr) { 421; CHECK-NEON-LABEL: @load_undef_mask_factor4( 422; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[PTR:%.*]], i32 4) 423; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3 424; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2 425; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1 426; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0 427; CHECK-NEON-NEXT: ret void 428; 429; CHECK-MVE-LABEL: @load_undef_mask_factor4( 430; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 431; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 432; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 433; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 poison, i32 poison> 434; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 poison, i32 poison> 435; CHECK-MVE-NEXT: ret void 436; 437; CHECK-NONE-LABEL: @load_undef_mask_factor4( 438; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 439; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 440; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 441; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 poison, i32 poison> 442; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 poison, i32 poison> 443; CHECK-NONE-NEXT: ret void 444; 445 %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 446 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef> 447 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef> 448 %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef> 449 %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 undef, i32 undef> 450 ret void 451} 452 453define void @store_undef_mask_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) { 454; CHECK-NEON-LABEL: @store_undef_mask_factor2( 455; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 456; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 457; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 4) 458; CHECK-NEON-NEXT: ret void 459; 460; CHECK-MVE-LABEL: @store_undef_mask_factor2( 461; CHECK-MVE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 462; CHECK-MVE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 463; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 0) 464; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 1) 465; CHECK-MVE-NEXT: ret void 466; 467; CHECK-NONE-LABEL: @store_undef_mask_factor2( 468; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 6, i32 3, i32 7> 469; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 470; CHECK-NONE-NEXT: ret void 471; 472 %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7> 473 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 474 ret void 475} 476 477define void @store_undef_mask_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { 478; CHECK-NEON-LABEL: @store_undef_mask_factor3( 479; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 480; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 481; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 482; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 483; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 484; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 485; CHECK-NEON-NEXT: ret void 486; 487; CHECK-MVE-LABEL: @store_undef_mask_factor3( 488; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 489; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 490; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 poison, i32 1, i32 poison, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 491; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 492; CHECK-MVE-NEXT: ret void 493; 494; CHECK-NONE-LABEL: @store_undef_mask_factor3( 495; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 496; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 497; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <12 x i32> <i32 0, i32 4, i32 poison, i32 1, i32 poison, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 498; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 499; CHECK-NONE-NEXT: ret void 500; 501 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 502 %s1 = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 503 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 504 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 505 ret void 506} 507 508define void @store_undef_mask_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { 509; CHECK-NEON-LABEL: @store_undef_mask_factor4( 510; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 511; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 512; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 513; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 514; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 515; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 516; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4) 517; CHECK-NEON-NEXT: ret void 518; 519; CHECK-MVE-LABEL: @store_undef_mask_factor4( 520; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 521; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 522; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 poison, i32 poison, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 523; CHECK-MVE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 524; CHECK-MVE-NEXT: ret void 525; 526; CHECK-NONE-LABEL: @store_undef_mask_factor4( 527; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 528; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 529; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[S0]], <8 x i32> [[S1]], <16 x i32> <i32 0, i32 4, i32 8, i32 poison, i32 poison, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 530; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 531; CHECK-NONE-NEXT: ret void 532; 533 %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 534 %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 535 %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 536 store <16 x i32> %interleaved.vec, ptr %ptr, align 4 537 ret void 538} 539 540define void @load_address_space(ptr addrspace(1) %ptr) { 541; CHECK-NEON-LABEL: @load_address_space( 542; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p1(ptr addrspace(1) [[PTR:%.*]], i32 32) 543; CHECK-NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 2 544; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 1 545; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLDN]], 0 546; CHECK-NEON-NEXT: ret void 547; 548; CHECK-MVE-LABEL: @load_address_space( 549; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr addrspace(1) [[PTR:%.*]], align 32 550; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 0, i32 3> 551; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 1, i32 4> 552; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 2, i32 5> 553; CHECK-MVE-NEXT: ret void 554; 555; CHECK-NONE-LABEL: @load_address_space( 556; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x i32>, ptr addrspace(1) [[PTR:%.*]], align 32 557; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 0, i32 3> 558; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 1, i32 4> 559; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <8 x i32> [[INTERLEAVED_VEC]], <8 x i32> undef, <2 x i32> <i32 2, i32 5> 560; CHECK-NONE-NEXT: ret void 561; 562 %interleaved.vec = load <8 x i32>, ptr addrspace(1) %ptr 563 %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 0, i32 3> 564 %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 1, i32 4> 565 %v2 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> undef, <2 x i32> <i32 2, i32 5> 566 ret void 567} 568 569define void @store_address_space(ptr addrspace(1) %ptr, <2 x i32> %v0, <2 x i32> %v1) { 570; CHECK-NEON-LABEL: @store_address_space( 571; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <2 x i32> <i32 0, i32 1> 572; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[V0]], <2 x i32> [[V1]], <2 x i32> <i32 2, i32 3> 573; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p1.v2i32(ptr addrspace(1) [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], i32 8) 574; CHECK-NEON-NEXT: ret void 575; 576; CHECK-MVE-LABEL: @store_address_space( 577; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3> 578; CHECK-MVE-NEXT: store <4 x i32> [[INTERLEAVED_VEC]], ptr addrspace(1) [[PTR:%.*]], align 8 579; CHECK-MVE-NEXT: ret void 580; 581; CHECK-NONE-LABEL: @store_address_space( 582; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <2 x i32> [[V0:%.*]], <2 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 2, i32 1, i32 3> 583; CHECK-NONE-NEXT: store <4 x i32> [[INTERLEAVED_VEC]], ptr addrspace(1) [[PTR:%.*]], align 8 584; CHECK-NONE-NEXT: ret void 585; 586 %interleaved.vec = shufflevector <2 x i32> %v0, <2 x i32> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 587 store <4 x i32> %interleaved.vec, ptr addrspace(1) %ptr 588 ret void 589} 590 591define void @load_f16_factor2(ptr %ptr) { 592; CHECK-NEON-LABEL: @load_f16_factor2( 593; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x half>, ptr [[PTR:%.*]], align 4 594; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 595; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 596; CHECK-NEON-NEXT: ret void 597; 598; CHECK-MVE-LABEL: @load_f16_factor2( 599; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x half>, ptr [[PTR:%.*]], align 4 600; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 601; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 602; CHECK-MVE-NEXT: ret void 603; 604; CHECK-NONE-LABEL: @load_f16_factor2( 605; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <8 x half>, ptr [[PTR:%.*]], align 4 606; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 607; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <8 x half> [[INTERLEAVED_VEC]], <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 608; CHECK-NONE-NEXT: ret void 609; 610 %interleaved.vec = load <8 x half>, ptr %ptr, align 4 611 %v0 = shufflevector <8 x half> %interleaved.vec, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 612 %v1 = shufflevector <8 x half> %interleaved.vec, <8 x half> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 613 ret void 614} 615 616define void @store_f16_factor2(ptr %ptr, <4 x half> %v0, <4 x half> %v1) { 617; CHECK-NEON-LABEL: @store_f16_factor2( 618; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 619; CHECK-NEON-NEXT: store <8 x half> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 620; CHECK-NEON-NEXT: ret void 621; 622; CHECK-MVE-LABEL: @store_f16_factor2( 623; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 624; CHECK-MVE-NEXT: store <8 x half> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 625; CHECK-MVE-NEXT: ret void 626; 627; CHECK-NONE-LABEL: @store_f16_factor2( 628; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x half> [[V0:%.*]], <4 x half> [[V1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 629; CHECK-NONE-NEXT: store <8 x half> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 630; CHECK-NONE-NEXT: ret void 631; 632 %interleaved.vec = shufflevector <4 x half> %v0, <4 x half> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 633 store <8 x half> %interleaved.vec, ptr %ptr, align 4 634 ret void 635} 636 637define void @load_illegal_factor2(ptr %ptr) nounwind { 638; CHECK-NEON-LABEL: @load_illegal_factor2( 639; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 640; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison> 641; CHECK-NEON-NEXT: ret void 642; 643; CHECK-MVE-LABEL: @load_illegal_factor2( 644; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 645; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison> 646; CHECK-MVE-NEXT: ret void 647; 648; CHECK-NONE-LABEL: @load_illegal_factor2( 649; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <3 x float>, ptr [[PTR:%.*]], align 16 650; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <3 x float> [[INTERLEAVED_VEC]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison> 651; CHECK-NONE-NEXT: ret void 652; 653 %interleaved.vec = load <3 x float>, ptr %ptr, align 16 654 %v0 = shufflevector <3 x float> %interleaved.vec, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef> 655 ret void 656} 657 658define void @store_illegal_factor2(ptr %ptr, <3 x float> %v0) nounwind { 659; CHECK-NEON-LABEL: @store_illegal_factor2( 660; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison> 661; CHECK-NEON-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 662; CHECK-NEON-NEXT: ret void 663; 664; CHECK-MVE-LABEL: @store_illegal_factor2( 665; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison> 666; CHECK-MVE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 667; CHECK-MVE-NEXT: ret void 668; 669; CHECK-NONE-LABEL: @store_illegal_factor2( 670; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <3 x float> [[V0:%.*]], <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 poison> 671; CHECK-NONE-NEXT: store <3 x float> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 16 672; CHECK-NONE-NEXT: ret void 673; 674 %interleaved.vec = shufflevector <3 x float> %v0, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef> 675 store <3 x float> %interleaved.vec, ptr %ptr, align 16 676 ret void 677} 678 679define void @store_general_mask_factor4(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 680; CHECK-NEON-LABEL: @store_general_mask_factor4( 681; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5> 682; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17> 683; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33> 684; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9> 685; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4) 686; CHECK-NEON-NEXT: ret void 687; 688; CHECK-MVE-LABEL: @store_general_mask_factor4( 689; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 690; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 691; CHECK-MVE-NEXT: ret void 692; 693; CHECK-NONE-LABEL: @store_general_mask_factor4( 694; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 695; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 696; CHECK-NONE-NEXT: ret void 697; 698 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 699 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 700 ret void 701} 702 703define void @store_general_mask_factor4_undefbeg(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 704; CHECK-NEON-LABEL: @store_general_mask_factor4_undefbeg( 705; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5> 706; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17> 707; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33> 708; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9> 709; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4) 710; CHECK-NEON-NEXT: ret void 711; 712; CHECK-MVE-LABEL: @store_general_mask_factor4_undefbeg( 713; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 poison, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 714; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 715; CHECK-MVE-NEXT: ret void 716; 717; CHECK-NONE-LABEL: @store_general_mask_factor4_undefbeg( 718; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 poison, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 719; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 720; CHECK-NONE-NEXT: ret void 721; 722 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 undef, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 9> 723 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 724 ret void 725} 726 727define void @store_general_mask_factor4_undefend(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 728; CHECK-NEON-LABEL: @store_general_mask_factor4_undefend( 729; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5> 730; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17> 731; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33> 732; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9> 733; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4) 734; CHECK-NEON-NEXT: ret void 735; 736; CHECK-MVE-LABEL: @store_general_mask_factor4_undefend( 737; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 poison> 738; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 739; CHECK-MVE-NEXT: ret void 740; 741; CHECK-NONE-LABEL: @store_general_mask_factor4_undefend( 742; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 poison> 743; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 744; CHECK-NONE-NEXT: ret void 745; 746 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 16, i32 32, i32 8, i32 5, i32 17, i32 33, i32 undef> 747 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 748 ret void 749} 750 751define void @store_general_mask_factor4_undefmid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 752; CHECK-NEON-LABEL: @store_general_mask_factor4_undefmid( 753; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5> 754; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 16, i32 17> 755; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 32, i32 33> 756; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9> 757; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4) 758; CHECK-NEON-NEXT: ret void 759; 760; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmid( 761; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 32, i32 8, i32 5, i32 17, i32 poison, i32 9> 762; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 763; CHECK-MVE-NEXT: ret void 764; 765; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmid( 766; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 32, i32 8, i32 5, i32 17, i32 poison, i32 9> 767; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 768; CHECK-NONE-NEXT: ret void 769; 770 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 32, i32 8, i32 5, i32 17, i32 undef, i32 9> 771 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 772 ret void 773} 774 775define void @store_general_mask_factor4_undefmulti(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 776; CHECK-NEON-LABEL: @store_general_mask_factor4_undefmulti( 777; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <2 x i32> <i32 4, i32 5> 778; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 0, i32 1> 779; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 0, i32 1> 780; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <2 x i32> <i32 8, i32 9> 781; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v2i32(ptr [[PTR:%.*]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], i32 4) 782; CHECK-NEON-NEXT: ret void 783; 784; CHECK-MVE-LABEL: @store_general_mask_factor4_undefmulti( 785; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 poison, i32 9> 786; CHECK-MVE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 787; CHECK-MVE-NEXT: ret void 788; 789; CHECK-NONE-LABEL: @store_general_mask_factor4_undefmulti( 790; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <8 x i32> <i32 4, i32 poison, i32 poison, i32 8, i32 poison, i32 poison, i32 poison, i32 9> 791; CHECK-NONE-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 792; CHECK-NONE-NEXT: ret void 793; 794 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 undef, i32 9> 795 store <8 x i32> %interleaved.vec, ptr %ptr, align 4 796 ret void 797} 798 799define void @store_general_mask_factor3(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 800; CHECK-NEON-LABEL: @store_general_mask_factor3( 801; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 802; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35> 803; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 804; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 805; CHECK-NEON-NEXT: ret void 806; 807; CHECK-MVE-LABEL: @store_general_mask_factor3( 808; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19> 809; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 810; CHECK-MVE-NEXT: ret void 811; 812; CHECK-NONE-LABEL: @store_general_mask_factor3( 813; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19> 814; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 815; CHECK-NONE-NEXT: ret void 816; 817 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 5, i32 33, i32 17, i32 6, i32 34, i32 18, i32 7, i32 35, i32 19> 818 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 819 ret void 820} 821 822define void @store_general_mask_factor3_undefmultimid(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 823; CHECK-NEON-LABEL: @store_general_mask_factor3_undefmultimid( 824; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 825; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35> 826; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 827; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 828; CHECK-NEON-NEXT: ret void 829; 830; CHECK-MVE-LABEL: @store_general_mask_factor3_undefmultimid( 831; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19> 832; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 833; CHECK-MVE-NEXT: ret void 834; 835; CHECK-NONE-LABEL: @store_general_mask_factor3_undefmultimid( 836; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19> 837; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 838; CHECK-NONE-NEXT: ret void 839; 840 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19> 841 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 842 ret void 843} 844 845define void @store_general_mask_factor3_undef_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 846; CHECK-NEON-LABEL: @store_general_mask_factor3_undef_fail( 847; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 8, i32 35, i32 19> 848; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 849; CHECK-NEON-NEXT: ret void 850; 851; CHECK-MVE-LABEL: @store_general_mask_factor3_undef_fail( 852; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 8, i32 35, i32 19> 853; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 854; CHECK-MVE-NEXT: ret void 855; 856; CHECK-NONE-LABEL: @store_general_mask_factor3_undef_fail( 857; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 4, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 8, i32 35, i32 19> 858; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 859; CHECK-NONE-NEXT: ret void 860; 861 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 4, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 8, i32 35, i32 19> 862 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 863 ret void 864} 865 866define void @store_general_mask_factor3_undeflane(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 867; CHECK-NEON-LABEL: @store_general_mask_factor3_undeflane( 868; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 869; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35> 870; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 871; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 872; CHECK-NEON-NEXT: ret void 873; 874; CHECK-MVE-LABEL: @store_general_mask_factor3_undeflane( 875; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19> 876; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 877; CHECK-MVE-NEXT: ret void 878; 879; CHECK-NONE-LABEL: @store_general_mask_factor3_undeflane( 880; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19> 881; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 882; CHECK-NONE-NEXT: ret void 883; 884 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19> 885 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 886 ret void 887} 888 889define void @store_general_mask_factor3_endstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 890; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_fail( 891; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 2, i32 35, i32 19> 892; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 893; CHECK-NEON-NEXT: ret void 894; 895; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_fail( 896; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 2, i32 35, i32 19> 897; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 898; CHECK-MVE-NEXT: ret void 899; 900; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_fail( 901; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 2, i32 35, i32 19> 902; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 903; CHECK-NONE-NEXT: ret void 904; 905 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 2, i32 35, i32 19> 906 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 907 ret void 908} 909 910define void @store_general_mask_factor3_endstart_pass(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 911; CHECK-NEON-LABEL: @store_general_mask_factor3_endstart_pass( 912; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 913; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35> 914; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 915; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 916; CHECK-NEON-NEXT: ret void 917; 918; CHECK-MVE-LABEL: @store_general_mask_factor3_endstart_pass( 919; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19> 920; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 921; CHECK-MVE-NEXT: ret void 922; 923; CHECK-NONE-LABEL: @store_general_mask_factor3_endstart_pass( 924; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 poison, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 7, i32 35, i32 19> 925; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 926; CHECK-NONE-NEXT: ret void 927; 928 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 undef, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 7, i32 35, i32 19> 929 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 930 ret void 931} 932 933define void @store_general_mask_factor3_midstart_fail(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 934; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_fail( 935; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 0, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19> 936; CHECK-NEON-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 937; CHECK-NEON-NEXT: ret void 938; 939; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_fail( 940; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 0, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19> 941; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 942; CHECK-MVE-NEXT: ret void 943; 944; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_fail( 945; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 0, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19> 946; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 947; CHECK-NONE-NEXT: ret void 948; 949 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 0, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19> 950 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 951 ret void 952} 953 954define void @store_general_mask_factor3_midstart_pass(ptr %ptr, <32 x i32> %v0, <32 x i32> %v1) { 955; CHECK-NEON-LABEL: @store_general_mask_factor3_midstart_pass( 956; CHECK-NEON-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 957; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35> 958; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[V0]], <32 x i32> [[V1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 959; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 960; CHECK-NEON-NEXT: ret void 961; 962; CHECK-MVE-LABEL: @store_general_mask_factor3_midstart_pass( 963; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 1, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19> 964; CHECK-MVE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 965; CHECK-MVE-NEXT: ret void 966; 967; CHECK-NONE-LABEL: @store_general_mask_factor3_midstart_pass( 968; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[V0:%.*]], <32 x i32> [[V1:%.*]], <12 x i32> <i32 poison, i32 32, i32 16, i32 1, i32 33, i32 17, i32 poison, i32 34, i32 18, i32 poison, i32 35, i32 19> 969; CHECK-NONE-NEXT: store <12 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 970; CHECK-NONE-NEXT: ret void 971; 972 %interleaved.vec = shufflevector <32 x i32> %v0, <32 x i32> %v1, <12 x i32> <i32 undef, i32 32, i32 16, i32 1, i32 33, i32 17, i32 undef, i32 34, i32 18, i32 undef, i32 35, i32 19> 973 store <12 x i32> %interleaved.vec, ptr %ptr, align 4 974 ret void 975} 976 977@g = external global <4 x float> 978 979; The following does not give a valid interleaved store 980define void @no_interleave(<4 x float> %a0) { 981; CHECK-NEON-LABEL: @no_interleave( 982; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 poison> 983; CHECK-NEON-NEXT: store <4 x float> [[V0]], ptr @g, align 16 984; CHECK-NEON-NEXT: ret void 985; 986; CHECK-MVE-LABEL: @no_interleave( 987; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 poison> 988; CHECK-MVE-NEXT: store <4 x float> [[V0]], ptr @g, align 16 989; CHECK-MVE-NEXT: ret void 990; 991; CHECK-NONE-LABEL: @no_interleave( 992; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A0]], <4 x i32> <i32 0, i32 7, i32 1, i32 poison> 993; CHECK-NONE-NEXT: store <4 x float> [[V0]], ptr @g, align 16 994; CHECK-NONE-NEXT: ret void 995; 996 %v0 = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 7, i32 1, i32 undef> 997 store <4 x float> %v0, ptr @g, align 16 998 ret void 999} 1000 1001define void @load_factor2_wide2(ptr %ptr) { 1002; CHECK-NEON-LABEL: @load_factor2_wide2( 1003; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4) 1004; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 1005; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 1006; CHECK-NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1007; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP5]], i32 4) 1008; CHECK-NEON-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 1009; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 1010; CHECK-NEON-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1011; CHECK-NEON-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1012; CHECK-NEON-NEXT: ret void 1013; 1014; CHECK-MVE-LABEL: @load_factor2_wide2( 1015; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]]) 1016; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 1017; CHECK-MVE-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 1018; CHECK-MVE-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1019; CHECK-MVE-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP4]]) 1020; CHECK-MVE-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 1021; CHECK-MVE-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 1022; CHECK-MVE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1023; CHECK-MVE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1024; CHECK-MVE-NEXT: ret void 1025; 1026; CHECK-NONE-LABEL: @load_factor2_wide2( 1027; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x i32>, ptr [[PTR:%.*]], align 4 1028; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1029; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x i32> [[INTERLEAVED_VEC]], <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1030; CHECK-NONE-NEXT: ret void 1031; 1032 %interleaved.vec = load <16 x i32>, ptr %ptr, align 4 1033 %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1034 %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1035 ret void 1036} 1037 1038define void @load_factor2_wide3(ptr %ptr) { 1039; CHECK-NEON-LABEL: @load_factor2_wide3( 1040; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4) 1041; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 1042; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 1043; CHECK-NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1044; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP5]], i32 4) 1045; CHECK-NEON-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 1046; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 1047; CHECK-NEON-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[TMP5]], i32 8 1048; CHECK-NEON-NEXT: [[VLDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP9]], i32 4) 1049; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1 1050; CHECK-NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0 1051; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1052; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 1053; CHECK-NEON-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1054; CHECK-NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1055; CHECK-NEON-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 1056; CHECK-NEON-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP16]], <8 x i32> [[TMP17]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1057; CHECK-NEON-NEXT: ret void 1058; 1059; CHECK-MVE-LABEL: @load_factor2_wide3( 1060; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]]) 1061; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 1062; CHECK-MVE-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 1063; CHECK-MVE-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1064; CHECK-MVE-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP4]]) 1065; CHECK-MVE-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 1066; CHECK-MVE-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 1067; CHECK-MVE-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP4]], i32 8 1068; CHECK-MVE-NEXT: [[VLDN2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP7]]) 1069; CHECK-MVE-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 1 1070; CHECK-MVE-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN2]], 0 1071; CHECK-MVE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1072; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 1073; CHECK-MVE-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1074; CHECK-MVE-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1075; CHECK-MVE-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> 1076; CHECK-MVE-NEXT: [[TMP15:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP14]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 1077; CHECK-MVE-NEXT: ret void 1078; 1079; CHECK-NONE-LABEL: @load_factor2_wide3( 1080; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, ptr [[PTR:%.*]], align 4 1081; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22> 1082; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23> 1083; CHECK-NONE-NEXT: ret void 1084; 1085 %interleaved.vec = load <24 x i32>, ptr %ptr, align 4 1086 %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22> 1087 %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <12 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23> 1088 ret void 1089} 1090 1091define void @load_factor3_wide(ptr %ptr) { 1092; CHECK-NEON-LABEL: @load_factor3_wide( 1093; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[PTR:%.*]], i32 4) 1094; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2 1095; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1 1096; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0 1097; CHECK-NEON-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[PTR]], i32 12 1098; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0(ptr [[TMP6]], i32 4) 1099; CHECK-NEON-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 2 1100; CHECK-NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 1 1101; CHECK-NEON-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 0 1102; CHECK-NEON-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1103; CHECK-NEON-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1104; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1105; CHECK-NEON-NEXT: ret void 1106; 1107; CHECK-MVE-LABEL: @load_factor3_wide( 1108; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, ptr [[PTR:%.*]], align 4 1109; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 1110; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 1111; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 1112; CHECK-MVE-NEXT: ret void 1113; 1114; CHECK-NONE-LABEL: @load_factor3_wide( 1115; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <24 x i32>, ptr [[PTR:%.*]], align 4 1116; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 1117; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 1118; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <24 x i32> [[INTERLEAVED_VEC]], <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 1119; CHECK-NONE-NEXT: ret void 1120; 1121 %interleaved.vec = load <24 x i32>, ptr %ptr, align 4 1122 %v0 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 1123 %v1 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 1124 %v2 = shufflevector <24 x i32> %interleaved.vec, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 1125 ret void 1126} 1127 1128define void @load_factor4_wide(ptr %ptr) { 1129; CHECK-NEON-LABEL: @load_factor4_wide( 1130; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[PTR:%.*]], i32 4) 1131; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 3 1132; CHECK-NEON-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 2 1133; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 1 1134; CHECK-NEON-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN]], 0 1135; CHECK-NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR]], i32 16 1136; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0(ptr [[TMP7]], i32 4) 1137; CHECK-NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 3 1138; CHECK-NEON-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 2 1139; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 1 1140; CHECK-NEON-NEXT: [[TMP12:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLDN1]], 0 1141; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1142; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1143; CHECK-NEON-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1144; CHECK-NEON-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1145; CHECK-NEON-NEXT: ret void 1146; 1147; CHECK-MVE-LABEL: @load_factor4_wide( 1148; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <32 x i32>, ptr [[PTR:%.*]], align 4 1149; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 1150; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 1151; CHECK-MVE-NEXT: [[V2:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 1152; CHECK-MVE-NEXT: [[V3:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 1153; CHECK-MVE-NEXT: ret void 1154; 1155; CHECK-NONE-LABEL: @load_factor4_wide( 1156; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <32 x i32>, ptr [[PTR:%.*]], align 4 1157; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 1158; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 1159; CHECK-NONE-NEXT: [[V2:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 1160; CHECK-NONE-NEXT: [[V3:%.*]] = shufflevector <32 x i32> [[INTERLEAVED_VEC]], <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 1161; CHECK-NONE-NEXT: ret void 1162; 1163 %interleaved.vec = load <32 x i32>, ptr %ptr, align 4 1164 %v0 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 1165 %v1 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 1166 %v2 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 1167 %v3 = shufflevector <32 x i32> %interleaved.vec, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 1168 ret void 1169} 1170 1171define void @store_factor2_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1) { 1172; CHECK-NEON-LABEL: @store_factor2_wide( 1173; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1174; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 1175; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 4) 1176; CHECK-NEON-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1177; CHECK-NEON-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1178; CHECK-NEON-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 1179; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst2.p0.v4i32(ptr [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], i32 4) 1180; CHECK-NEON-NEXT: ret void 1181; 1182; CHECK-MVE-LABEL: @store_factor2_wide( 1183; CHECK-MVE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1184; CHECK-MVE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 1185; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 0) 1186; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[PTR]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i32 1) 1187; CHECK-MVE-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1188; CHECK-MVE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1189; CHECK-MVE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[V0]], <8 x i32> [[V1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 1190; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], i32 0) 1191; CHECK-MVE-NEXT: call void @llvm.arm.mve.vst2q.p0.v4i32(ptr [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], i32 1) 1192; CHECK-MVE-NEXT: ret void 1193; 1194; CHECK-NONE-LABEL: @store_factor2_wide( 1195; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1196; CHECK-NONE-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 1197; CHECK-NONE-NEXT: ret void 1198; 1199 %interleaved.vec = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1200 store <16 x i32> %interleaved.vec, ptr %ptr, align 4 1201 ret void 1202} 1203 1204define void @store_factor3_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2) { 1205; CHECK-NEON-LABEL: @store_factor3_wide( 1206; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1207; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1208; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1209; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 1210; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 1211; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i32 4) 1212; CHECK-NEON-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[PTR]], i32 12 1213; CHECK-NEON-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1214; CHECK-NEON-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 1215; CHECK-NEON-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 20, i32 21, i32 22, i32 23> 1216; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst3.p0.v4i32(ptr [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4) 1217; CHECK-NEON-NEXT: ret void 1218; 1219; CHECK-MVE-LABEL: @store_factor3_wide( 1220; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1221; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1222; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> 1223; CHECK-MVE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 1224; CHECK-MVE-NEXT: ret void 1225; 1226; CHECK-NONE-LABEL: @store_factor3_wide( 1227; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1228; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 1229; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> 1230; CHECK-NONE-NEXT: store <24 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 1231; CHECK-NONE-NEXT: ret void 1232; 1233 %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1234 %s1 = shufflevector <8 x i32> %v2, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1235 %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <24 x i32> <i32 0, i32 8, i32 16, i32 1, i32 9, i32 17, i32 2, i32 10, i32 18, i32 3, i32 11, i32 19, i32 4, i32 12, i32 20, i32 5, i32 13, i32 21, i32 6, i32 14, i32 22, i32 7, i32 15, i32 23> 1236 store <24 x i32> %interleaved.vec, ptr %ptr, align 4 1237 ret void 1238} 1239 1240define void @store_factor4_wide(ptr %ptr, <8 x i32> %v0, <8 x i32> %v1, <8 x i32> %v2, <8 x i32> %v3) { 1241; CHECK-NEON-LABEL: @store_factor4_wide( 1242; CHECK-NEON-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1243; CHECK-NEON-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1244; CHECK-NEON-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1245; CHECK-NEON-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11> 1246; CHECK-NEON-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19> 1247; CHECK-NEON-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 24, i32 25, i32 26, i32 27> 1248; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[PTR:%.*]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], i32 4) 1249; CHECK-NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR]], i32 16 1250; CHECK-NEON-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1251; CHECK-NEON-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15> 1252; CHECK-NEON-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 20, i32 21, i32 22, i32 23> 1253; CHECK-NEON-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <4 x i32> <i32 28, i32 29, i32 30, i32 31> 1254; CHECK-NEON-NEXT: call void @llvm.arm.neon.vst4.p0.v4i32(ptr [[TMP7]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32 4) 1255; CHECK-NEON-NEXT: ret void 1256; 1257; CHECK-MVE-LABEL: @store_factor4_wide( 1258; CHECK-MVE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1259; CHECK-MVE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1260; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> 1261; CHECK-MVE-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 1262; CHECK-MVE-NEXT: ret void 1263; 1264; CHECK-NONE-LABEL: @store_factor4_wide( 1265; CHECK-NONE-NEXT: [[S0:%.*]] = shufflevector <8 x i32> [[V0:%.*]], <8 x i32> [[V1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1266; CHECK-NONE-NEXT: [[S1:%.*]] = shufflevector <8 x i32> [[V2:%.*]], <8 x i32> [[V3:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1267; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[S0]], <16 x i32> [[S1]], <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> 1268; CHECK-NONE-NEXT: store <32 x i32> [[INTERLEAVED_VEC]], ptr [[PTR:%.*]], align 4 1269; CHECK-NONE-NEXT: ret void 1270; 1271 %s0 = shufflevector <8 x i32> %v0, <8 x i32> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1272 %s1 = shufflevector <8 x i32> %v2, <8 x i32> %v3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1273 %interleaved.vec = shufflevector <16 x i32> %s0, <16 x i32> %s1, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31> 1274 store <32 x i32> %interleaved.vec, ptr %ptr, align 4 1275 ret void 1276} 1277 1278define void @load_factor2_fp128(ptr %ptr) { 1279; CHECK-NEON-LABEL: @load_factor2_fp128( 1280; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, ptr [[PTR:%.*]], align 16 1281; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 0, i32 2> 1282; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 1, i32 3> 1283; CHECK-NEON-NEXT: ret void 1284; 1285; CHECK-MVE-LABEL: @load_factor2_fp128( 1286; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, ptr [[PTR:%.*]], align 16 1287; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 0, i32 2> 1288; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 1, i32 3> 1289; CHECK-MVE-NEXT: ret void 1290; 1291; CHECK-NONE-LABEL: @load_factor2_fp128( 1292; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x fp128>, ptr [[PTR:%.*]], align 16 1293; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 0, i32 2> 1294; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x fp128> [[INTERLEAVED_VEC]], <4 x fp128> undef, <2 x i32> <i32 1, i32 3> 1295; CHECK-NONE-NEXT: ret void 1296; 1297 %interleaved.vec = load <4 x fp128>, ptr %ptr, align 16 1298 %v0 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 0, i32 2> 1299 %v1 = shufflevector <4 x fp128> %interleaved.vec, <4 x fp128> undef, <2 x i32> <i32 1, i32 3> 1300 ret void 1301} 1302 1303define void @load_factor2_wide_pointer(ptr %ptr) { 1304; CHECK-NEON-LABEL: @load_factor2_wide_pointer( 1305; CHECK-NEON-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[PTR:%.*]], i32 4) 1306; CHECK-NEON-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 1307; CHECK-NEON-NEXT: [[TMP4:%.*]] = inttoptr <4 x i32> [[TMP3]] to <4 x ptr> 1308; CHECK-NEON-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 1309; CHECK-NEON-NEXT: [[TMP6:%.*]] = inttoptr <4 x i32> [[TMP5]] to <4 x ptr> 1310; CHECK-NEON-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1311; CHECK-NEON-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0(ptr [[TMP7]], i32 4) 1312; CHECK-NEON-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 1313; CHECK-NEON-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x ptr> 1314; CHECK-NEON-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 1315; CHECK-NEON-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x ptr> 1316; CHECK-NEON-NEXT: [[TMP13:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1317; CHECK-NEON-NEXT: [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1318; CHECK-NEON-NEXT: ret void 1319; 1320; CHECK-MVE-LABEL: @load_factor2_wide_pointer( 1321; CHECK-MVE-NEXT: [[VLDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[PTR:%.*]]) 1322; CHECK-MVE-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 1 1323; CHECK-MVE-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr> 1324; CHECK-MVE-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN]], 0 1325; CHECK-MVE-NEXT: [[TMP5:%.*]] = inttoptr <4 x i32> [[TMP4]] to <4 x ptr> 1326; CHECK-MVE-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[PTR]], i32 8 1327; CHECK-MVE-NEXT: [[VLDN1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0(ptr [[TMP6]]) 1328; CHECK-MVE-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 1 1329; CHECK-MVE-NEXT: [[TMP8:%.*]] = inttoptr <4 x i32> [[TMP7]] to <4 x ptr> 1330; CHECK-MVE-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[VLDN1]], 0 1331; CHECK-MVE-NEXT: [[TMP10:%.*]] = inttoptr <4 x i32> [[TMP9]] to <4 x ptr> 1332; CHECK-MVE-NEXT: [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1333; CHECK-MVE-NEXT: [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1334; CHECK-MVE-NEXT: ret void 1335; 1336; CHECK-NONE-LABEL: @load_factor2_wide_pointer( 1337; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <16 x ptr>, ptr [[PTR:%.*]], align 4 1338; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <16 x ptr> [[INTERLEAVED_VEC]], <16 x ptr> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1339; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <16 x ptr> [[INTERLEAVED_VEC]], <16 x ptr> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1340; CHECK-NONE-NEXT: ret void 1341; 1342 %interleaved.vec = load <16 x ptr>, ptr %ptr, align 4 1343 %v0 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1344 %v1 = shufflevector <16 x ptr> %interleaved.vec, <16 x ptr> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1345 ret void 1346} 1347 1348; This would be a candidate for interleaving, except that load doesn't 1349; actually load enough elements to satisfy the shuffle masks. (It would be 1350; possible to produce a vld2.v2i32, but that currently isn't implemented.) 1351define void @load_out_of_range(ptr %ptr) { 1352; CHECK-NEON-LABEL: @load_out_of_range( 1353; CHECK-NEON-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 1354; CHECK-NEON-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison> 1355; CHECK-NEON-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison> 1356; CHECK-NEON-NEXT: ret void 1357; 1358; CHECK-MVE-LABEL: @load_out_of_range( 1359; CHECK-MVE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 1360; CHECK-MVE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison> 1361; CHECK-MVE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison> 1362; CHECK-MVE-NEXT: ret void 1363; 1364; CHECK-NONE-LABEL: @load_out_of_range( 1365; CHECK-NONE-NEXT: [[INTERLEAVED_VEC:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4 1366; CHECK-NONE-NEXT: [[V0:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 poison, i32 poison> 1367; CHECK-NONE-NEXT: [[V1:%.*]] = shufflevector <4 x i32> [[INTERLEAVED_VEC]], <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 poison, i32 poison> 1368; CHECK-NONE-NEXT: ret void 1369; 1370 %interleaved.vec = load <4 x i32>, ptr %ptr, align 4 1371 %v0 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef> 1372 %v1 = shufflevector <4 x i32> %interleaved.vec, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 1373 ret void 1374} 1375