1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=lower-matrix-intrinsics,instcombine -fuse-matrix-use-loops=false -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s 3 4; REQUIRES: aarch64-registered-target 5 6target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 7target triple = "aarch64-apple-ios" 8 9; Test tiling without generating explicit loops. 10 11define void @multiply(ptr %A, ptr %B, ptr %C) { 12; CHECK-LABEL: @multiply( 13; CHECK-NEXT: entry: 14; CHECK-NEXT: [[STORE_BEGIN:%.*]] = ptrtoint ptr [[C:%.*]] to i64 15; CHECK-NEXT: [[STORE_END:%.*]] = add nuw nsw i64 [[STORE_BEGIN]], 128 16; CHECK-NEXT: [[LOAD_BEGIN:%.*]] = ptrtoint ptr [[A:%.*]] to i64 17; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[STORE_END]], [[LOAD_BEGIN]] 18; CHECK-NEXT: br i1 [[TMP0]], label [[ALIAS_CONT:%.*]], label [[NO_ALIAS:%.*]] 19; CHECK: alias_cont: 20; CHECK-NEXT: [[LOAD_END:%.*]] = add nuw nsw i64 [[LOAD_BEGIN]], 128 21; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[LOAD_END]], [[STORE_BEGIN]] 22; CHECK-NEXT: br i1 [[TMP1]], label [[COPY:%.*]], label [[NO_ALIAS]] 23; CHECK: copy: 24; CHECK-NEXT: [[TMP2:%.*]] = alloca [16 x double], align 8 25; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP2]], ptr noundef nonnull align 8 dereferenceable(128) [[A]], i64 128, i1 false) 26; CHECK-NEXT: br label [[NO_ALIAS]] 27; CHECK: no_alias: 28; CHECK-NEXT: [[TMP3:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[A]], [[ALIAS_CONT]] ], [ [[TMP2]], [[COPY]] ] 29; CHECK-NEXT: [[STORE_BEGIN4:%.*]] = ptrtoint ptr [[C]] to i64 30; CHECK-NEXT: [[STORE_END5:%.*]] = add nuw nsw i64 [[STORE_BEGIN4]], 128 31; CHECK-NEXT: [[LOAD_BEGIN6:%.*]] = ptrtoint ptr [[B:%.*]] to i64 32; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[STORE_END5]], [[LOAD_BEGIN6]] 33; CHECK-NEXT: br i1 [[TMP4]], label [[ALIAS_CONT1:%.*]], label [[NO_ALIAS3:%.*]] 34; CHECK: alias_cont1: 35; CHECK-NEXT: [[LOAD_END7:%.*]] = add nuw nsw i64 [[LOAD_BEGIN6]], 128 36; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[LOAD_END7]], [[STORE_BEGIN4]] 37; CHECK-NEXT: br i1 [[TMP5]], label [[COPY2:%.*]], label [[NO_ALIAS3]] 38; CHECK: copy2: 39; CHECK-NEXT: [[TMP6:%.*]] = alloca [16 x double], align 8 40; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(128) [[TMP6]], ptr noundef nonnull align 8 dereferenceable(128) [[B]], i64 128, i1 false) 41; CHECK-NEXT: br label [[NO_ALIAS3]] 42; CHECK: no_alias3: 43; CHECK-NEXT: [[TMP7:%.*]] = phi ptr [ [[B]], [[NO_ALIAS]] ], [ [[B]], [[ALIAS_CONT1]] ], [ [[TMP6]], [[COPY2]] ] 44; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 45; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 32 46; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8 47; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 48; CHECK-NEXT: [[VEC_GEP10:%.*]] = getelementptr i8, ptr [[TMP7]], i64 32 49; CHECK-NEXT: [[COL_LOAD11:%.*]] = load <2 x double>, ptr [[VEC_GEP10]], align 8 50; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD9]], <2 x double> poison, <2 x i32> zeroinitializer 51; CHECK-NEXT: [[TMP8:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] 52; CHECK-NEXT: [[SPLAT_SPLAT14:%.*]] = shufflevector <2 x double> [[COL_LOAD9]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 53; CHECK-NEXT: [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD8]], <2 x double> [[SPLAT_SPLAT14]], <2 x double> [[TMP8]]) 54; CHECK-NEXT: [[SPLAT_SPLAT17:%.*]] = shufflevector <2 x double> [[COL_LOAD11]], <2 x double> poison, <2 x i32> zeroinitializer 55; CHECK-NEXT: [[TMP10:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT17]] 56; CHECK-NEXT: [[SPLAT_SPLAT20:%.*]] = shufflevector <2 x double> [[COL_LOAD11]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 57; CHECK-NEXT: [[TMP11:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD8]], <2 x double> [[SPLAT_SPLAT20]], <2 x double> [[TMP10]]) 58; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP3]], i64 64 59; CHECK-NEXT: [[COL_LOAD21:%.*]] = load <2 x double>, ptr [[TMP12]], align 8 60; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i8, ptr [[TMP3]], i64 96 61; CHECK-NEXT: [[COL_LOAD23:%.*]] = load <2 x double>, ptr [[VEC_GEP22]], align 8 62; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP7]], i64 16 63; CHECK-NEXT: [[COL_LOAD24:%.*]] = load <2 x double>, ptr [[TMP13]], align 8 64; CHECK-NEXT: [[VEC_GEP25:%.*]] = getelementptr i8, ptr [[TMP7]], i64 48 65; CHECK-NEXT: [[COL_LOAD26:%.*]] = load <2 x double>, ptr [[VEC_GEP25]], align 8 66; CHECK-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <2 x double> [[COL_LOAD24]], <2 x double> poison, <2 x i32> zeroinitializer 67; CHECK-NEXT: [[TMP14:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD21]], <2 x double> [[SPLAT_SPLAT30]], <2 x double> [[TMP9]]) 68; CHECK-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <2 x double> [[COL_LOAD24]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 69; CHECK-NEXT: [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD23]], <2 x double> [[SPLAT_SPLAT33]], <2 x double> [[TMP14]]) 70; CHECK-NEXT: [[SPLAT_SPLAT37:%.*]] = shufflevector <2 x double> [[COL_LOAD26]], <2 x double> poison, <2 x i32> zeroinitializer 71; CHECK-NEXT: [[TMP16:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD21]], <2 x double> [[SPLAT_SPLAT37]], <2 x double> [[TMP11]]) 72; CHECK-NEXT: [[SPLAT_SPLAT40:%.*]] = shufflevector <2 x double> [[COL_LOAD26]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 73; CHECK-NEXT: [[TMP17:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD23]], <2 x double> [[SPLAT_SPLAT40]], <2 x double> [[TMP16]]) 74; CHECK-NEXT: store <2 x double> [[TMP15]], ptr [[C]], align 8 75; CHECK-NEXT: [[VEC_GEP41:%.*]] = getelementptr i8, ptr [[C]], i64 32 76; CHECK-NEXT: store <2 x double> [[TMP17]], ptr [[VEC_GEP41]], align 8 77; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16 78; CHECK-NEXT: [[COL_LOAD42:%.*]] = load <2 x double>, ptr [[TMP18]], align 8 79; CHECK-NEXT: [[VEC_GEP43:%.*]] = getelementptr i8, ptr [[TMP3]], i64 48 80; CHECK-NEXT: [[COL_LOAD44:%.*]] = load <2 x double>, ptr [[VEC_GEP43]], align 8 81; CHECK-NEXT: [[COL_LOAD45:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 82; CHECK-NEXT: [[VEC_GEP46:%.*]] = getelementptr i8, ptr [[TMP7]], i64 32 83; CHECK-NEXT: [[COL_LOAD47:%.*]] = load <2 x double>, ptr [[VEC_GEP46]], align 8 84; CHECK-NEXT: [[SPLAT_SPLAT50:%.*]] = shufflevector <2 x double> [[COL_LOAD45]], <2 x double> poison, <2 x i32> zeroinitializer 85; CHECK-NEXT: [[TMP19:%.*]] = fmul contract <2 x double> [[COL_LOAD42]], [[SPLAT_SPLAT50]] 86; CHECK-NEXT: [[SPLAT_SPLAT53:%.*]] = shufflevector <2 x double> [[COL_LOAD45]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 87; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD44]], <2 x double> [[SPLAT_SPLAT53]], <2 x double> [[TMP19]]) 88; CHECK-NEXT: [[SPLAT_SPLAT56:%.*]] = shufflevector <2 x double> [[COL_LOAD47]], <2 x double> poison, <2 x i32> zeroinitializer 89; CHECK-NEXT: [[TMP21:%.*]] = fmul contract <2 x double> [[COL_LOAD42]], [[SPLAT_SPLAT56]] 90; CHECK-NEXT: [[SPLAT_SPLAT59:%.*]] = shufflevector <2 x double> [[COL_LOAD47]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 91; CHECK-NEXT: [[TMP22:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD44]], <2 x double> [[SPLAT_SPLAT59]], <2 x double> [[TMP21]]) 92; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[TMP3]], i64 80 93; CHECK-NEXT: [[COL_LOAD60:%.*]] = load <2 x double>, ptr [[TMP23]], align 8 94; CHECK-NEXT: [[VEC_GEP61:%.*]] = getelementptr i8, ptr [[TMP3]], i64 112 95; CHECK-NEXT: [[COL_LOAD62:%.*]] = load <2 x double>, ptr [[VEC_GEP61]], align 8 96; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP7]], i64 16 97; CHECK-NEXT: [[COL_LOAD63:%.*]] = load <2 x double>, ptr [[TMP24]], align 8 98; CHECK-NEXT: [[VEC_GEP64:%.*]] = getelementptr i8, ptr [[TMP7]], i64 48 99; CHECK-NEXT: [[COL_LOAD65:%.*]] = load <2 x double>, ptr [[VEC_GEP64]], align 8 100; CHECK-NEXT: [[SPLAT_SPLAT69:%.*]] = shufflevector <2 x double> [[COL_LOAD63]], <2 x double> poison, <2 x i32> zeroinitializer 101; CHECK-NEXT: [[TMP25:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD60]], <2 x double> [[SPLAT_SPLAT69]], <2 x double> [[TMP20]]) 102; CHECK-NEXT: [[SPLAT_SPLAT72:%.*]] = shufflevector <2 x double> [[COL_LOAD63]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 103; CHECK-NEXT: [[TMP26:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD62]], <2 x double> [[SPLAT_SPLAT72]], <2 x double> [[TMP25]]) 104; CHECK-NEXT: [[SPLAT_SPLAT76:%.*]] = shufflevector <2 x double> [[COL_LOAD65]], <2 x double> poison, <2 x i32> zeroinitializer 105; CHECK-NEXT: [[TMP27:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD60]], <2 x double> [[SPLAT_SPLAT76]], <2 x double> [[TMP22]]) 106; CHECK-NEXT: [[SPLAT_SPLAT79:%.*]] = shufflevector <2 x double> [[COL_LOAD65]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 107; CHECK-NEXT: [[TMP28:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD62]], <2 x double> [[SPLAT_SPLAT79]], <2 x double> [[TMP27]]) 108; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[C]], i64 16 109; CHECK-NEXT: store <2 x double> [[TMP26]], ptr [[TMP29]], align 8 110; CHECK-NEXT: [[VEC_GEP80:%.*]] = getelementptr i8, ptr [[C]], i64 48 111; CHECK-NEXT: store <2 x double> [[TMP28]], ptr [[VEC_GEP80]], align 8 112; CHECK-NEXT: [[COL_LOAD81:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 113; CHECK-NEXT: [[VEC_GEP82:%.*]] = getelementptr i8, ptr [[TMP3]], i64 32 114; CHECK-NEXT: [[COL_LOAD83:%.*]] = load <2 x double>, ptr [[VEC_GEP82]], align 8 115; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64 116; CHECK-NEXT: [[COL_LOAD84:%.*]] = load <2 x double>, ptr [[TMP30]], align 8 117; CHECK-NEXT: [[VEC_GEP85:%.*]] = getelementptr i8, ptr [[TMP7]], i64 96 118; CHECK-NEXT: [[COL_LOAD86:%.*]] = load <2 x double>, ptr [[VEC_GEP85]], align 8 119; CHECK-NEXT: [[SPLAT_SPLAT89:%.*]] = shufflevector <2 x double> [[COL_LOAD84]], <2 x double> poison, <2 x i32> zeroinitializer 120; CHECK-NEXT: [[TMP31:%.*]] = fmul contract <2 x double> [[COL_LOAD81]], [[SPLAT_SPLAT89]] 121; CHECK-NEXT: [[SPLAT_SPLAT92:%.*]] = shufflevector <2 x double> [[COL_LOAD84]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 122; CHECK-NEXT: [[TMP32:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD83]], <2 x double> [[SPLAT_SPLAT92]], <2 x double> [[TMP31]]) 123; CHECK-NEXT: [[SPLAT_SPLAT95:%.*]] = shufflevector <2 x double> [[COL_LOAD86]], <2 x double> poison, <2 x i32> zeroinitializer 124; CHECK-NEXT: [[TMP33:%.*]] = fmul contract <2 x double> [[COL_LOAD81]], [[SPLAT_SPLAT95]] 125; CHECK-NEXT: [[SPLAT_SPLAT98:%.*]] = shufflevector <2 x double> [[COL_LOAD86]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 126; CHECK-NEXT: [[TMP34:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD83]], <2 x double> [[SPLAT_SPLAT98]], <2 x double> [[TMP33]]) 127; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP3]], i64 64 128; CHECK-NEXT: [[COL_LOAD99:%.*]] = load <2 x double>, ptr [[TMP35]], align 8 129; CHECK-NEXT: [[VEC_GEP100:%.*]] = getelementptr i8, ptr [[TMP3]], i64 96 130; CHECK-NEXT: [[COL_LOAD101:%.*]] = load <2 x double>, ptr [[VEC_GEP100]], align 8 131; CHECK-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[TMP7]], i64 80 132; CHECK-NEXT: [[COL_LOAD102:%.*]] = load <2 x double>, ptr [[TMP36]], align 8 133; CHECK-NEXT: [[VEC_GEP103:%.*]] = getelementptr i8, ptr [[TMP7]], i64 112 134; CHECK-NEXT: [[COL_LOAD104:%.*]] = load <2 x double>, ptr [[VEC_GEP103]], align 8 135; CHECK-NEXT: [[SPLAT_SPLAT108:%.*]] = shufflevector <2 x double> [[COL_LOAD102]], <2 x double> poison, <2 x i32> zeroinitializer 136; CHECK-NEXT: [[TMP37:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD99]], <2 x double> [[SPLAT_SPLAT108]], <2 x double> [[TMP32]]) 137; CHECK-NEXT: [[SPLAT_SPLAT111:%.*]] = shufflevector <2 x double> [[COL_LOAD102]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 138; CHECK-NEXT: [[TMP38:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD101]], <2 x double> [[SPLAT_SPLAT111]], <2 x double> [[TMP37]]) 139; CHECK-NEXT: [[SPLAT_SPLAT115:%.*]] = shufflevector <2 x double> [[COL_LOAD104]], <2 x double> poison, <2 x i32> zeroinitializer 140; CHECK-NEXT: [[TMP39:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD99]], <2 x double> [[SPLAT_SPLAT115]], <2 x double> [[TMP34]]) 141; CHECK-NEXT: [[SPLAT_SPLAT118:%.*]] = shufflevector <2 x double> [[COL_LOAD104]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 142; CHECK-NEXT: [[TMP40:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD101]], <2 x double> [[SPLAT_SPLAT118]], <2 x double> [[TMP39]]) 143; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[C]], i64 64 144; CHECK-NEXT: store <2 x double> [[TMP38]], ptr [[TMP41]], align 8 145; CHECK-NEXT: [[VEC_GEP119:%.*]] = getelementptr i8, ptr [[C]], i64 96 146; CHECK-NEXT: store <2 x double> [[TMP40]], ptr [[VEC_GEP119]], align 8 147; CHECK-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16 148; CHECK-NEXT: [[COL_LOAD120:%.*]] = load <2 x double>, ptr [[TMP42]], align 8 149; CHECK-NEXT: [[VEC_GEP121:%.*]] = getelementptr i8, ptr [[TMP3]], i64 48 150; CHECK-NEXT: [[COL_LOAD122:%.*]] = load <2 x double>, ptr [[VEC_GEP121]], align 8 151; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64 152; CHECK-NEXT: [[COL_LOAD123:%.*]] = load <2 x double>, ptr [[TMP43]], align 8 153; CHECK-NEXT: [[VEC_GEP124:%.*]] = getelementptr i8, ptr [[TMP7]], i64 96 154; CHECK-NEXT: [[COL_LOAD125:%.*]] = load <2 x double>, ptr [[VEC_GEP124]], align 8 155; CHECK-NEXT: [[SPLAT_SPLAT128:%.*]] = shufflevector <2 x double> [[COL_LOAD123]], <2 x double> poison, <2 x i32> zeroinitializer 156; CHECK-NEXT: [[TMP44:%.*]] = fmul contract <2 x double> [[COL_LOAD120]], [[SPLAT_SPLAT128]] 157; CHECK-NEXT: [[SPLAT_SPLAT131:%.*]] = shufflevector <2 x double> [[COL_LOAD123]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 158; CHECK-NEXT: [[TMP45:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD122]], <2 x double> [[SPLAT_SPLAT131]], <2 x double> [[TMP44]]) 159; CHECK-NEXT: [[SPLAT_SPLAT134:%.*]] = shufflevector <2 x double> [[COL_LOAD125]], <2 x double> poison, <2 x i32> zeroinitializer 160; CHECK-NEXT: [[TMP46:%.*]] = fmul contract <2 x double> [[COL_LOAD120]], [[SPLAT_SPLAT134]] 161; CHECK-NEXT: [[SPLAT_SPLAT137:%.*]] = shufflevector <2 x double> [[COL_LOAD125]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 162; CHECK-NEXT: [[TMP47:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD122]], <2 x double> [[SPLAT_SPLAT137]], <2 x double> [[TMP46]]) 163; CHECK-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP3]], i64 80 164; CHECK-NEXT: [[COL_LOAD138:%.*]] = load <2 x double>, ptr [[TMP48]], align 8 165; CHECK-NEXT: [[VEC_GEP139:%.*]] = getelementptr i8, ptr [[TMP3]], i64 112 166; CHECK-NEXT: [[COL_LOAD140:%.*]] = load <2 x double>, ptr [[VEC_GEP139]], align 8 167; CHECK-NEXT: [[TMP49:%.*]] = getelementptr i8, ptr [[TMP7]], i64 80 168; CHECK-NEXT: [[COL_LOAD141:%.*]] = load <2 x double>, ptr [[TMP49]], align 8 169; CHECK-NEXT: [[VEC_GEP142:%.*]] = getelementptr i8, ptr [[TMP7]], i64 112 170; CHECK-NEXT: [[COL_LOAD143:%.*]] = load <2 x double>, ptr [[VEC_GEP142]], align 8 171; CHECK-NEXT: [[SPLAT_SPLAT147:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> poison, <2 x i32> zeroinitializer 172; CHECK-NEXT: [[TMP50:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD138]], <2 x double> [[SPLAT_SPLAT147]], <2 x double> [[TMP45]]) 173; CHECK-NEXT: [[SPLAT_SPLAT150:%.*]] = shufflevector <2 x double> [[COL_LOAD141]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 174; CHECK-NEXT: [[TMP51:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD140]], <2 x double> [[SPLAT_SPLAT150]], <2 x double> [[TMP50]]) 175; CHECK-NEXT: [[SPLAT_SPLAT154:%.*]] = shufflevector <2 x double> [[COL_LOAD143]], <2 x double> poison, <2 x i32> zeroinitializer 176; CHECK-NEXT: [[TMP52:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD138]], <2 x double> [[SPLAT_SPLAT154]], <2 x double> [[TMP47]]) 177; CHECK-NEXT: [[SPLAT_SPLAT157:%.*]] = shufflevector <2 x double> [[COL_LOAD143]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 178; CHECK-NEXT: [[TMP53:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD140]], <2 x double> [[SPLAT_SPLAT157]], <2 x double> [[TMP52]]) 179; CHECK-NEXT: [[TMP54:%.*]] = getelementptr i8, ptr [[C]], i64 80 180; CHECK-NEXT: store <2 x double> [[TMP51]], ptr [[TMP54]], align 8 181; CHECK-NEXT: [[VEC_GEP158:%.*]] = getelementptr i8, ptr [[C]], i64 112 182; CHECK-NEXT: store <2 x double> [[TMP53]], ptr [[VEC_GEP158]], align 8 183; CHECK-NEXT: ret void 184; 185 186 187;; np.dot(a[0:2, 0:2], b[0:2, 0:2]) 188 189 190;; + np.dot(a[0:2, 2:4], b[2:4, 0:2]) 191 192 193;; -> c[0:2, 0:2] 194 195 196;; np.dot(a[2:4, 0:2], b[0:2, 0:2]) 197 198 199;; + np.dot(a[2:4, 2:4], b[2:4, 0:2]) 200 201 202;; -> c[2:4, 0:2] 203 204 205;; np.dot(a[0:2, 0:2], b[0:2, 2:4]) 206 207 208;; + np.dot(a[0:2, 2:4], b[2:4, 2:4]) 209 210 211;; -> c[0:2, 2:4] 212 213 214;; np.dot(a[2:4, 0:2], b[2:4, 0:2]) 215 216 217;; + np.dot(a[2:4, 2:4], b[2:4, 2:4]) 218 219 220;; -> c[2:4, 2:4] 221 222entry: 223 %a = load <16 x double>, ptr %A, align 8 224 %b = load <16 x double>, ptr %B, align 8 225 226 %c = call <16 x double> @llvm.matrix.multiply(<16 x double> %a, <16 x double> %b, i32 4, i32 4, i32 4) 227 228 store <16 x double> %c, ptr %C, align 8 229 ret void 230} 231 232; The same load is used for both operands of the multiply. 233define void @multiply_reuse_load(ptr noalias %A, ptr noalias %B, ptr noalias %C) { 234; CHECK-LABEL: @multiply_reuse_load( 235; CHECK-NEXT: entry: 236; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8 237; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 32 238; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8 239; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> zeroinitializer 240; CHECK-NEXT: [[TMP0:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT]] 241; CHECK-NEXT: [[SPLAT_SPLAT7:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 242; CHECK-NEXT: [[TMP1:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD1]], <2 x double> [[SPLAT_SPLAT7]], <2 x double> [[TMP0]]) 243; CHECK-NEXT: [[SPLAT_SPLAT10:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> zeroinitializer 244; CHECK-NEXT: [[TMP2:%.*]] = fmul contract <2 x double> [[COL_LOAD]], [[SPLAT_SPLAT10]] 245; CHECK-NEXT: [[SPLAT_SPLAT13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 246; CHECK-NEXT: [[TMP3:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD1]], <2 x double> [[SPLAT_SPLAT13]], <2 x double> [[TMP2]]) 247; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 64 248; CHECK-NEXT: [[COL_LOAD14:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 249; CHECK-NEXT: [[VEC_GEP15:%.*]] = getelementptr i8, ptr [[A]], i64 96 250; CHECK-NEXT: [[COL_LOAD16:%.*]] = load <2 x double>, ptr [[VEC_GEP15]], align 8 251; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[A]], i64 16 252; CHECK-NEXT: [[COL_LOAD17:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 253; CHECK-NEXT: [[VEC_GEP18:%.*]] = getelementptr i8, ptr [[A]], i64 48 254; CHECK-NEXT: [[COL_LOAD19:%.*]] = load <2 x double>, ptr [[VEC_GEP18]], align 8 255; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x double> [[COL_LOAD17]], <2 x double> poison, <2 x i32> zeroinitializer 256; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD14]], <2 x double> [[SPLAT_SPLAT23]], <2 x double> [[TMP1]]) 257; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <2 x double> [[COL_LOAD17]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 258; CHECK-NEXT: [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD16]], <2 x double> [[SPLAT_SPLAT26]], <2 x double> [[TMP6]]) 259; CHECK-NEXT: [[SPLAT_SPLAT30:%.*]] = shufflevector <2 x double> [[COL_LOAD19]], <2 x double> poison, <2 x i32> zeroinitializer 260; CHECK-NEXT: [[TMP8:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD14]], <2 x double> [[SPLAT_SPLAT30]], <2 x double> [[TMP3]]) 261; CHECK-NEXT: [[SPLAT_SPLAT33:%.*]] = shufflevector <2 x double> [[COL_LOAD19]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 262; CHECK-NEXT: [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD16]], <2 x double> [[SPLAT_SPLAT33]], <2 x double> [[TMP8]]) 263; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[C:%.*]], align 8 264; CHECK-NEXT: [[VEC_GEP34:%.*]] = getelementptr i8, ptr [[C]], i64 32 265; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[VEC_GEP34]], align 8 266; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 16 267; CHECK-NEXT: [[COL_LOAD35:%.*]] = load <2 x double>, ptr [[TMP10]], align 8 268; CHECK-NEXT: [[VEC_GEP36:%.*]] = getelementptr i8, ptr [[A]], i64 48 269; CHECK-NEXT: [[COL_LOAD37:%.*]] = load <2 x double>, ptr [[VEC_GEP36]], align 8 270; CHECK-NEXT: [[COL_LOAD38:%.*]] = load <2 x double>, ptr [[A]], align 8 271; CHECK-NEXT: [[VEC_GEP39:%.*]] = getelementptr i8, ptr [[A]], i64 32 272; CHECK-NEXT: [[COL_LOAD40:%.*]] = load <2 x double>, ptr [[VEC_GEP39]], align 8 273; CHECK-NEXT: [[SPLAT_SPLAT43:%.*]] = shufflevector <2 x double> [[COL_LOAD38]], <2 x double> poison, <2 x i32> zeroinitializer 274; CHECK-NEXT: [[TMP11:%.*]] = fmul contract <2 x double> [[COL_LOAD35]], [[SPLAT_SPLAT43]] 275; CHECK-NEXT: [[SPLAT_SPLAT46:%.*]] = shufflevector <2 x double> [[COL_LOAD38]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 276; CHECK-NEXT: [[TMP12:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD37]], <2 x double> [[SPLAT_SPLAT46]], <2 x double> [[TMP11]]) 277; CHECK-NEXT: [[SPLAT_SPLAT49:%.*]] = shufflevector <2 x double> [[COL_LOAD40]], <2 x double> poison, <2 x i32> zeroinitializer 278; CHECK-NEXT: [[TMP13:%.*]] = fmul contract <2 x double> [[COL_LOAD35]], [[SPLAT_SPLAT49]] 279; CHECK-NEXT: [[SPLAT_SPLAT52:%.*]] = shufflevector <2 x double> [[COL_LOAD40]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 280; CHECK-NEXT: [[TMP14:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD37]], <2 x double> [[SPLAT_SPLAT52]], <2 x double> [[TMP13]]) 281; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[A]], i64 80 282; CHECK-NEXT: [[COL_LOAD53:%.*]] = load <2 x double>, ptr [[TMP15]], align 8 283; CHECK-NEXT: [[VEC_GEP54:%.*]] = getelementptr i8, ptr [[A]], i64 112 284; CHECK-NEXT: [[COL_LOAD55:%.*]] = load <2 x double>, ptr [[VEC_GEP54]], align 8 285; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[A]], i64 16 286; CHECK-NEXT: [[COL_LOAD56:%.*]] = load <2 x double>, ptr [[TMP16]], align 8 287; CHECK-NEXT: [[VEC_GEP57:%.*]] = getelementptr i8, ptr [[A]], i64 48 288; CHECK-NEXT: [[COL_LOAD58:%.*]] = load <2 x double>, ptr [[VEC_GEP57]], align 8 289; CHECK-NEXT: [[SPLAT_SPLAT62:%.*]] = shufflevector <2 x double> [[COL_LOAD56]], <2 x double> poison, <2 x i32> zeroinitializer 290; CHECK-NEXT: [[TMP17:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD53]], <2 x double> [[SPLAT_SPLAT62]], <2 x double> [[TMP12]]) 291; CHECK-NEXT: [[SPLAT_SPLAT65:%.*]] = shufflevector <2 x double> [[COL_LOAD56]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 292; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD55]], <2 x double> [[SPLAT_SPLAT65]], <2 x double> [[TMP17]]) 293; CHECK-NEXT: [[SPLAT_SPLAT69:%.*]] = shufflevector <2 x double> [[COL_LOAD58]], <2 x double> poison, <2 x i32> zeroinitializer 294; CHECK-NEXT: [[TMP19:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD53]], <2 x double> [[SPLAT_SPLAT69]], <2 x double> [[TMP14]]) 295; CHECK-NEXT: [[SPLAT_SPLAT72:%.*]] = shufflevector <2 x double> [[COL_LOAD58]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 296; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD55]], <2 x double> [[SPLAT_SPLAT72]], <2 x double> [[TMP19]]) 297; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[C]], i64 16 298; CHECK-NEXT: store <2 x double> [[TMP18]], ptr [[TMP21]], align 8 299; CHECK-NEXT: [[VEC_GEP73:%.*]] = getelementptr i8, ptr [[C]], i64 48 300; CHECK-NEXT: store <2 x double> [[TMP20]], ptr [[VEC_GEP73]], align 8 301; CHECK-NEXT: [[COL_LOAD74:%.*]] = load <2 x double>, ptr [[A]], align 8 302; CHECK-NEXT: [[VEC_GEP75:%.*]] = getelementptr i8, ptr [[A]], i64 32 303; CHECK-NEXT: [[COL_LOAD76:%.*]] = load <2 x double>, ptr [[VEC_GEP75]], align 8 304; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[A]], i64 64 305; CHECK-NEXT: [[COL_LOAD77:%.*]] = load <2 x double>, ptr [[TMP22]], align 8 306; CHECK-NEXT: [[VEC_GEP78:%.*]] = getelementptr i8, ptr [[A]], i64 96 307; CHECK-NEXT: [[COL_LOAD79:%.*]] = load <2 x double>, ptr [[VEC_GEP78]], align 8 308; CHECK-NEXT: [[SPLAT_SPLAT82:%.*]] = shufflevector <2 x double> [[COL_LOAD77]], <2 x double> poison, <2 x i32> zeroinitializer 309; CHECK-NEXT: [[TMP23:%.*]] = fmul contract <2 x double> [[COL_LOAD74]], [[SPLAT_SPLAT82]] 310; CHECK-NEXT: [[SPLAT_SPLAT85:%.*]] = shufflevector <2 x double> [[COL_LOAD77]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 311; CHECK-NEXT: [[TMP24:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD76]], <2 x double> [[SPLAT_SPLAT85]], <2 x double> [[TMP23]]) 312; CHECK-NEXT: [[SPLAT_SPLAT88:%.*]] = shufflevector <2 x double> [[COL_LOAD79]], <2 x double> poison, <2 x i32> zeroinitializer 313; CHECK-NEXT: [[TMP25:%.*]] = fmul contract <2 x double> [[COL_LOAD74]], [[SPLAT_SPLAT88]] 314; CHECK-NEXT: [[SPLAT_SPLAT91:%.*]] = shufflevector <2 x double> [[COL_LOAD79]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 315; CHECK-NEXT: [[TMP26:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD76]], <2 x double> [[SPLAT_SPLAT91]], <2 x double> [[TMP25]]) 316; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[A]], i64 64 317; CHECK-NEXT: [[COL_LOAD92:%.*]] = load <2 x double>, ptr [[TMP27]], align 8 318; CHECK-NEXT: [[VEC_GEP93:%.*]] = getelementptr i8, ptr [[A]], i64 96 319; CHECK-NEXT: [[COL_LOAD94:%.*]] = load <2 x double>, ptr [[VEC_GEP93]], align 8 320; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[A]], i64 80 321; CHECK-NEXT: [[COL_LOAD95:%.*]] = load <2 x double>, ptr [[TMP28]], align 8 322; CHECK-NEXT: [[VEC_GEP96:%.*]] = getelementptr i8, ptr [[A]], i64 112 323; CHECK-NEXT: [[COL_LOAD97:%.*]] = load <2 x double>, ptr [[VEC_GEP96]], align 8 324; CHECK-NEXT: [[SPLAT_SPLAT101:%.*]] = shufflevector <2 x double> [[COL_LOAD95]], <2 x double> poison, <2 x i32> zeroinitializer 325; CHECK-NEXT: [[TMP29:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD92]], <2 x double> [[SPLAT_SPLAT101]], <2 x double> [[TMP24]]) 326; CHECK-NEXT: [[SPLAT_SPLAT104:%.*]] = shufflevector <2 x double> [[COL_LOAD95]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 327; CHECK-NEXT: [[TMP30:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD94]], <2 x double> [[SPLAT_SPLAT104]], <2 x double> [[TMP29]]) 328; CHECK-NEXT: [[SPLAT_SPLAT108:%.*]] = shufflevector <2 x double> [[COL_LOAD97]], <2 x double> poison, <2 x i32> zeroinitializer 329; CHECK-NEXT: [[TMP31:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD92]], <2 x double> [[SPLAT_SPLAT108]], <2 x double> [[TMP26]]) 330; CHECK-NEXT: [[SPLAT_SPLAT111:%.*]] = shufflevector <2 x double> [[COL_LOAD97]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 331; CHECK-NEXT: [[TMP32:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD94]], <2 x double> [[SPLAT_SPLAT111]], <2 x double> [[TMP31]]) 332; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[C]], i64 64 333; CHECK-NEXT: store <2 x double> [[TMP30]], ptr [[TMP33]], align 8 334; CHECK-NEXT: [[VEC_GEP112:%.*]] = getelementptr i8, ptr [[C]], i64 96 335; CHECK-NEXT: store <2 x double> [[TMP32]], ptr [[VEC_GEP112]], align 8 336; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[A]], i64 16 337; CHECK-NEXT: [[COL_LOAD113:%.*]] = load <2 x double>, ptr [[TMP34]], align 8 338; CHECK-NEXT: [[VEC_GEP114:%.*]] = getelementptr i8, ptr [[A]], i64 48 339; CHECK-NEXT: [[COL_LOAD115:%.*]] = load <2 x double>, ptr [[VEC_GEP114]], align 8 340; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[A]], i64 64 341; CHECK-NEXT: [[COL_LOAD116:%.*]] = load <2 x double>, ptr [[TMP35]], align 8 342; CHECK-NEXT: [[VEC_GEP117:%.*]] = getelementptr i8, ptr [[A]], i64 96 343; CHECK-NEXT: [[COL_LOAD118:%.*]] = load <2 x double>, ptr [[VEC_GEP117]], align 8 344; CHECK-NEXT: [[SPLAT_SPLAT121:%.*]] = shufflevector <2 x double> [[COL_LOAD116]], <2 x double> poison, <2 x i32> zeroinitializer 345; CHECK-NEXT: [[TMP36:%.*]] = fmul contract <2 x double> [[COL_LOAD113]], [[SPLAT_SPLAT121]] 346; CHECK-NEXT: [[SPLAT_SPLAT124:%.*]] = shufflevector <2 x double> [[COL_LOAD116]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 347; CHECK-NEXT: [[TMP37:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD115]], <2 x double> [[SPLAT_SPLAT124]], <2 x double> [[TMP36]]) 348; CHECK-NEXT: [[SPLAT_SPLAT127:%.*]] = shufflevector <2 x double> [[COL_LOAD118]], <2 x double> poison, <2 x i32> zeroinitializer 349; CHECK-NEXT: [[TMP38:%.*]] = fmul contract <2 x double> [[COL_LOAD113]], [[SPLAT_SPLAT127]] 350; CHECK-NEXT: [[SPLAT_SPLAT130:%.*]] = shufflevector <2 x double> [[COL_LOAD118]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 351; CHECK-NEXT: [[TMP39:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD115]], <2 x double> [[SPLAT_SPLAT130]], <2 x double> [[TMP38]]) 352; CHECK-NEXT: [[TMP40:%.*]] = getelementptr i8, ptr [[A]], i64 80 353; CHECK-NEXT: [[COL_LOAD131:%.*]] = load <2 x double>, ptr [[TMP40]], align 8 354; CHECK-NEXT: [[VEC_GEP132:%.*]] = getelementptr i8, ptr [[A]], i64 112 355; CHECK-NEXT: [[COL_LOAD133:%.*]] = load <2 x double>, ptr [[VEC_GEP132]], align 8 356; CHECK-NEXT: [[SPLAT_SPLAT140:%.*]] = shufflevector <2 x double> [[COL_LOAD131]], <2 x double> poison, <2 x i32> zeroinitializer 357; CHECK-NEXT: [[TMP41:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD131]], <2 x double> [[SPLAT_SPLAT140]], <2 x double> [[TMP37]]) 358; CHECK-NEXT: [[SPLAT_SPLAT143:%.*]] = shufflevector <2 x double> [[COL_LOAD131]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 359; CHECK-NEXT: [[TMP42:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD133]], <2 x double> [[SPLAT_SPLAT143]], <2 x double> [[TMP41]]) 360; CHECK-NEXT: [[SPLAT_SPLAT147:%.*]] = shufflevector <2 x double> [[COL_LOAD133]], <2 x double> poison, <2 x i32> zeroinitializer 361; CHECK-NEXT: [[TMP43:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD131]], <2 x double> [[SPLAT_SPLAT147]], <2 x double> [[TMP39]]) 362; CHECK-NEXT: [[SPLAT_SPLAT150:%.*]] = shufflevector <2 x double> [[COL_LOAD133]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 363; CHECK-NEXT: [[TMP44:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD133]], <2 x double> [[SPLAT_SPLAT150]], <2 x double> [[TMP43]]) 364; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i8, ptr [[C]], i64 80 365; CHECK-NEXT: store <2 x double> [[TMP42]], ptr [[TMP45]], align 8 366; CHECK-NEXT: [[VEC_GEP151:%.*]] = getelementptr i8, ptr [[C]], i64 112 367; CHECK-NEXT: store <2 x double> [[TMP44]], ptr [[VEC_GEP151]], align 8 368; CHECK-NEXT: ret void 369; 370entry: 371 %a = load <16 x double>, ptr %A, align 8 372 %c = call <16 x double> @llvm.matrix.multiply(<16 x double> %a, <16 x double> %a, i32 4, i32 4, i32 4) 373 store <16 x double> %c, ptr %C, align 8 374 ret void 375} 376 377declare <16 x double> @llvm.matrix.multiply(<16 x double>, <16 x double>, i32, i32, i32) 378