1; RUN: llc < %s | FileCheck --check-prefix AS %s 2; RUN: opt -S -interleaved-load-combine < %s | FileCheck %s 3; RUN: opt -S -passes=interleaved-load-combine < %s | FileCheck %s 4 5; ModuleID = 'aarch64_interleaved-ld-combine.bc' 6target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 7target triple = "arm64--linux-gnu" 8 9; This should be lowered into LD4 10define void @aarch64_ilc_const(ptr %ptr) { 11entry: 12 13;;; Check LLVM transformation 14; CHECK-LABEL: @aarch64_ilc_const( 15; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 2 16; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 16 17; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 18; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 19; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 20; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 21; CHECK: ret void 22 23;;; Check if it gets lowerd 24; AS-LABEL: aarch64_ilc_const 25; AS: ld4 26; AS: ret 27 28 %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 2 29 %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 3 30 %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64 4 31 %gep4 = getelementptr inbounds <4 x float>, ptr %ptr, i64 5 32 %ld1 = load <4 x float>, ptr %gep1, align 16 33 %ld2 = load <4 x float>, ptr %gep2, align 16 34 %ld3 = load <4 x float>, ptr %gep3, align 16 35 %ld4 = load <4 x float>, ptr %gep4, align 16 36 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 37 %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 38 %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 39 %sv4 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 40 %m0_3 = shufflevector <4 x float> %sv1, <4 x float> %sv3, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 41 %m4_7 = shufflevector <4 x float> %sv1, <4 x float> %sv3, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 42 %m8_11 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 43 %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 44 45 store <4 x float> %m0_3, ptr %gep1, align 16 46 store <4 x float> %m4_7, ptr %gep2, align 16 47 store <4 x float> %m8_11, ptr %gep3, align 16 48 store <4 x float> %m12_15, ptr %gep4, align 16 49 ret void 50} 51 52; This should be lowered into LD4 53define void @aarch64_ilc_idx(ptr %ptr, i64 %idx) { 54entry: 55 56;;; Check LLVM transformation 57; CHECK-LABEL: @aarch64_ilc_idx( 58; CHECK-DAG: [[ADD:%.+]] = add i64 %idx, 16 59; CHECK-DAG: [[LSHR:%.+]] = lshr i64 [[ADD]], 2 60; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]] 61; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 16 62; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 63; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 64; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 65; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 66; CHECK: ret void 67 68; AS-LABEL: aarch64_ilc_idx 69; AS-DAG: lsl [[LSL:x[0-9]+]], x1, #2 70; AS-DAG: add [[ADD:x[0-9]+]], [[LSL]], #64 71; AS-DAG: and [[AND:x[0-9]+]], [[ADD]], #0xfffffffffffffff0 72; AS-DAG: add [[ADR:x[0-9]+]], x0, [[AND]] 73; AS-DAG: ld4 { v[[V0:[0-9]+]].4s, v[[V1:[0-9]+]].4s, v[[V2:[0-9]+]].4s, v[[V3:[0-9]+]].4s }, [[[ADR]]] 74; AS-DAG: str q[[V0]] 75; AS-DAG: str q[[V1]] 76; AS-DAG: str q[[V2]] 77; AS-DAG: str q[[V3]] 78; AS: ret 79 80 %a2 = add i64 %idx, 20 81 %idx2 = lshr i64 %a2, 2 82 %a3 = add i64 %idx, 24 83 %a1 = add i64 %idx, 16 84 %idx1 = lshr i64 %a1, 2 85 %idx3 = lshr i64 %a3, 2 86 %a4 = add i64 %idx, 28 87 %idx4 = lshr i64 %a4, 2 88 89 %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx2 90 %gep4 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx4 91 %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx1 92 %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx3 93 %ld1 = load <4 x float>, ptr %gep1, align 16 94 %ld2 = load <4 x float>, ptr %gep2, align 16 95 %ld3 = load <4 x float>, ptr %gep3, align 16 96 %ld4 = load <4 x float>, ptr %gep4, align 16 97 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 98 %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 99 %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 100 %sv4 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 101 %m0_3 = shufflevector <4 x float> %sv1, <4 x float> %sv3, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 102 %m4_7 = shufflevector <4 x float> %sv1, <4 x float> %sv3, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 103 %m8_11 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 104 %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 105 106 store <4 x float> %m0_3, ptr %gep1, align 16 107 store <4 x float> %m4_7, ptr %gep2, align 16 108 store <4 x float> %m8_11, ptr %gep3, align 16 109 store <4 x float> %m12_15, ptr %gep4, align 16 110 ret void 111} 112 113; This should be lowered into LD4, a offset of has to be taken into account 114%struct.ilc = type <{ float, [0 x <4 x float>] }> 115define void @aarch64_ilc_struct(ptr %ptr, i64 %idx) { 116entry: 117 118;;; Check LLVM transformation 119; CHECK-LABEL: @aarch64_ilc_struct( 120; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2 121; CHECK-DAG: [[GEP:%.+]] = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 [[LSHR]] 122; CHECK-DAG: [[LOAD:%.+]] = load <16 x float>, ptr [[GEP]], align 4 123; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 124; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 125; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 126; CHECK-DAG: %{{.* }}= shufflevector <16 x float> [[LOAD]], <16 x float> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 127; CHECK: ret void 128 129; AS-LABEL: aarch64_ilc_struct 130; AS-DAG: lsl [[LSL:x[0-9]+]], x1, #2 131; AS-DAG: add [[ADD:x[0-9]+]], x0, #4 132; AS-DAG: and [[AND:x[0-9]+]], [[LSL]], #0xfffffffffffffff0 133; AS-DAG: add [[ADR:x[0-9]+]], [[ADD]], [[AND]] 134; AS-DAG: ld4 { v[[V0:[0-9]+]].4s, v[[V1:[0-9]+]].4s, v[[V2:[0-9]+]].4s, v[[V3:[0-9]+]].4s }, [[[ADR]]] 135; AS-DAG: str q[[V0]] 136; AS-DAG: str q[[V1]] 137; AS-DAG: str q[[V2]] 138; AS-DAG: str q[[V3]] 139; AS: ret 140 141 %a1 = add i64 %idx, 4 142 %idx2 = lshr i64 %a1, 2 143 %a2 = add i64 %idx, 8 144 %idx3 = lshr i64 %a2, 2 145 %a3 = add i64 %idx, 12 146 %idx4 = lshr i64 %a3, 2 147 148 %gep2 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx2 149 %gep3 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx3 150 %gep4 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx4 151 %idx1 = lshr i64 %idx, 2 152 %gep1 = getelementptr %struct.ilc, ptr %ptr, i32 0, i32 1, i64 %idx1 153 %ld1 = load <4 x float>, ptr %gep1, align 4 154 %ld2 = load <4 x float>, ptr %gep2, align 4 155 %ld3 = load <4 x float>, ptr %gep3, align 4 156 %ld4 = load <4 x float>, ptr %gep4, align 4 157 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 158 %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 159 %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 160 %sv4 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 161 %m0_3 = shufflevector <4 x float> %sv1, <4 x float> %sv3, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 162 %m4_7 = shufflevector <4 x float> %sv1, <4 x float> %sv3, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 163 %m8_11 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 164 %m12_15 = shufflevector <4 x float> %sv2, <4 x float> %sv4, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 165 166 store <4 x float> %m0_3, ptr %gep1, align 16 167 store <4 x float> %m4_7, ptr %gep2, align 16 168 store <4 x float> %m8_11, ptr %gep3, align 16 169 store <4 x float> %m12_15, ptr %gep4, align 16 170 ret void 171} 172 173; This should be lowered into LD2 174define void @aarch64_ilc_idx_ld2(ptr %ptr, i64 %idx) { 175entry: 176; CHECK-LABEL: @aarch64_ilc_idx_ld2( 177; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2 178; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]] 179; CHECK-DAG: [[LOAD:%.+]] = load <8 x float>, ptr [[GEP]], align 16 180; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 181; CHECK: %{{.* }}= shufflevector <8 x float> [[LOAD]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 182; CHECK-DAG: ret void 183 184; AS-LABEL: aarch64_ilc_idx_ld2 185; AS: ld2 186; AS: ret 187 188 %idx1 = lshr i64 %idx, 2 189 %a1 = add i64 %idx, 4 190 %idx2 = lshr i64 %a1, 2 191 192 %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx1 193 %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx2 194 %ld1 = load <4 x float>, ptr %gep1, align 16 195 %ld2 = load <4 x float>, ptr %gep2, align 16 196 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 197 %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 198 199 store <4 x float> %m0_3, ptr %gep1 200 store <4 x float> %m4_7, ptr %gep2 201 ret void 202} 203 204; This should be lowered into LD3 205define void @aarch64_ilc_idx_ld3(ptr %ptr, i64 %idx) { 206entry: 207; CHECK-LABEL: @aarch64_ilc_idx_ld3( 208; CHECK-DAG: [[LSHR:%.+]] = lshr i64 %idx, 2 209; CHECK-DAG: [[GEP:%.+]] = getelementptr inbounds <4 x float>, ptr %ptr, i64 [[LSHR]] 210; CHECK-DAG: [[LOAD:%.+]] = load <12 x float>, ptr [[GEP]], align 16 211; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 212; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 213; CHECK: %{{.* }}= shufflevector <12 x float> [[LOAD]], <12 x float> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 214; CHECK-DAG: ret void 215 216; AS-LABEL: aarch64_ilc_idx_ld3 217; AS: ld3 218; AS: ret 219 220 %idx1 = lshr i64 %idx, 2 221 %a1 = add i64 %idx, 4 222 %idx2 = lshr i64 %a1, 2 223 %a2 = add i64 %idx, 8 224 %idx3 = lshr i64 %a2, 2 225 226 %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx1 227 %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx2 228 %gep3 = getelementptr inbounds <4 x float>, ptr %ptr, i64 %idx3 229 %ld1 = load <4 x float>, ptr %gep1, align 16 230 %ld2 = load <4 x float>, ptr %gep2, align 16 231 %ld3 = load <4 x float>, ptr %gep3, align 16 232 233 %sv1 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 3, i32 6, i32 undef> 234 %sv2 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 4, i32 7, i32 undef> 235 %sv3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef> 236 %m0_3 = shufflevector <4 x float> %sv1, <4 x float> %ld3, <4 x i32> <i32 0, i32 1, i32 2, i32 5> 237 %m4_7 = shufflevector <4 x float> %sv2, <4 x float> %ld3, <4 x i32> <i32 0, i32 1, i32 2, i32 6> 238 %m8_11 = shufflevector <4 x float> %sv3, <4 x float> %ld3, <4 x i32> <i32 0, i32 1, i32 4, i32 7> 239 240 store <4 x float> %m0_3, ptr %gep1, align 16 241 store <4 x float> %m4_7, ptr %gep2, align 16 242 store <4 x float> %m8_11, ptr %gep3, align 16 243 ret void 244} 245; %sv3 = shufflevector <4 x float> %ld3, <4 x float> %ld4, <4 x i32> <i32 0, i32 undef, i32 4, i32 undef> 246 247; This must not be lowered 248define void @aarch64_ilc_i32_idx(ptr %ptr, i32 %idx) { 249; CHECK-LABEL: @aarch64_ilc_i32_idx( 250; CHECK: %idx1 = lshr i32 %idx, 2 251; CHECK-NEXT: %a1 = add i32 %idx, 4 252; CHECK-NEXT: %idx2 = lshr i32 %a1, 2 253; CHECK-NEXT: %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx1 254; CHECK-NEXT: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx2 255; CHECK-NEXT: %ld1 = load <4 x float>, ptr %gep1, align 16 256; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16 257; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 258; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 259; CHECK-NEXT: store <4 x float> %m0_3, ptr %gep1, align 16 260; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16 261; CHECK-NEXT: ret void 262 263; AS-LABEL: aarch64_ilc_i32_idx 264; AS-DAG: @function 265; AS-NOT: ld2 266; AS-NOT: ld3 267; AS-NOT: ld4 268; AS-DAG: ret 269 270entry: 271 %idx1 = lshr i32 %idx, 2 272 %a1 = add i32 %idx, 4 273 %idx2 = lshr i32 %a1, 2 274 275 %gep1 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx1 276 %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 %idx2 277 %ld1 = load <4 x float>, ptr %gep1, align 16 278 %ld2 = load <4 x float>, ptr %gep2, align 16 279 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 280 %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 281 282 store <4 x float> %m0_3, ptr %gep1, align 16 283 store <4 x float> %m4_7, ptr %gep2, align 16 284 ret void 285} 286 287; Volatile loads must not be lowered 288define void @aarch64_ilc_volatile(ptr %ptr) { 289; CHECK-LABEL: @aarch64_ilc_volatile( 290; CHECK: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 291; CHECK-NEXT: %ld1 = load volatile <4 x float>, ptr %ptr, align 16 292; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16 293; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 294; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 295; CHECK-NEXT: store <4 x float> %m0_3, ptr %ptr, align 16 296; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16 297; CHECK-NEXT: ret void 298 299; AS-LABEL: aarch64_ilc_volatile 300; AS-DAG: @function 301; AS-NOT: ld2 302; AS-NOT: ld3 303; AS-NOT: ld4 304; AS-DAG: ret 305 306entry: 307 %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 308 %ld1 = load volatile <4 x float>, ptr %ptr, align 16 309 %ld2 = load <4 x float>, ptr %gep2, align 16 310 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 311 %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 312 store <4 x float> %m0_3, ptr %ptr, align 16 313 store <4 x float> %m4_7, ptr %gep2, align 16 314 ret void 315} 316 317; This must not be lowered 318define void @aarch64_ilc_depmem(ptr %ptr, i32 %idx) { 319entry: 320; CHECK-LABEL: @aarch64_ilc_depmem( 321; CHECK: %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 322; CHECK-NEXT: %ld1 = load <4 x float>, ptr %ptr, align 16 323; CHECK-NEXT: store <4 x float> %ld1, ptr %gep2, align 16 324; CHECK-NEXT: %ld2 = load <4 x float>, ptr %gep2, align 16 325; CHECK-NEXT: %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 326; CHECK-NEXT: %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 327; CHECK-NEXT: store <4 x float> %m0_3, ptr %ptr, align 16 328; CHECK-NEXT: store <4 x float> %m4_7, ptr %gep2, align 16 329; CHECK-NEXT: ret void 330 331; AS-LABEL: aarch64_ilc_depmem 332; AS-DAG: @function 333; AS-NOT: ld2 334; AS-NOT: ld3 335; AS-NOT: ld4 336; AS-DAG: ret 337 338 %gep2 = getelementptr inbounds <4 x float>, ptr %ptr, i32 1 339 %ld1 = load <4 x float>, ptr %ptr, align 16 340 store <4 x float> %ld1, ptr %gep2, align 16 341 %ld2 = load <4 x float>, ptr %gep2, align 16 342 %m0_3 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 343 %m4_7 = shufflevector <4 x float> %ld1, <4 x float> %ld2, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 344 345 store <4 x float> %m0_3, ptr %ptr, align 16 346 store <4 x float> %m4_7, ptr %gep2, align 16 347 ret void 348} 349 350; This cannot be converted - insertion position cannot be determined 351define void @aarch64_no_insertion_pos(ptr %ptr) { 352entry: 353; CHECK-LABEL: @aarch64_no_insertion_pos( 354; CHECK: %p1 = getelementptr inbounds float, ptr %ptr, i32 4 355; CHECK-NEXT: %l0 = load <5 x float>, ptr %ptr 356; CHECK-NEXT: %l1 = load <5 x float>, ptr %p1 357; CHECK-NEXT: %s0 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 1, i32 3, i32 6, i32 8> 358; CHECK-NEXT: %s1 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 2, i32 4, i32 7, i32 9> 359; CHECK-NEXT: ret void 360 361 %p1 = getelementptr inbounds float, ptr %ptr, i32 4 362 %l0 = load <5 x float>, ptr %ptr 363 %l1 = load <5 x float>, ptr %p1 364 %s0 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 1, i32 3, i32 6, i32 8> 365 %s1 = shufflevector <5 x float> %l0, <5 x float> %l1, <4 x i32> <i32 2, i32 4, i32 7, i32 9> 366 ret void 367} 368 369; This cannot be converted - the insertion position does not dominate all 370; uses 371define void @aarch64_insertpos_does_not_dominate(ptr %ptr) { 372entry: 373; CHECK-LABEL: @aarch64_insertpos_does_not_dominate( 374; CHECK: %p1 = getelementptr inbounds float, ptr %ptr, i32 1 375; CHECK-NEXT: %l1 = load <7 x float>, ptr %p1 376; CHECK-NEXT: %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 377; CHECK-NEXT: %l0 = load <7 x float>, ptr %ptr 378; CHECK-NEXT: %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 379; CHECK-NEXT: ret void 380 %p1 = getelementptr inbounds float, ptr %ptr, i32 1 381 %l1 = load <7 x float>, ptr %p1 382 %s1 = shufflevector <7 x float> %l1, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 383 %l0 = load <7 x float>, ptr %ptr 384 %s0 = shufflevector <7 x float> %l0, <7 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 385 ret void 386} 387