1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=lower-matrix-intrinsics -fuse-matrix-use-loops -fuse-matrix-tile-size=2 -matrix-allow-contract -force-fuse-matrix -verify-dom-info %s -S | FileCheck %s 3 4; REQUIRES: aarch64-registered-target 5 6target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 7target triple = "aarch64-apple-ios" 8 9define void @multiply_all_volatile(ptr noalias %A, ptr noalias %B, ptr noalias %C) { 10; CHECK-LABEL: @multiply_all_volatile( 11; CHECK-NEXT: entry: 12; CHECK-NEXT: br label [[COLS_HEADER:%.*]] 13; CHECK: cols.header: 14; CHECK-NEXT: [[COLS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[COLS_STEP:%.*]], [[COLS_LATCH:%.*]] ] 15; CHECK-NEXT: br label [[COLS_BODY:%.*]] 16; CHECK: cols.body: 17; CHECK-NEXT: br label [[ROWS_HEADER:%.*]] 18; CHECK: rows.header: 19; CHECK-NEXT: [[ROWS_IV:%.*]] = phi i64 [ 0, [[COLS_BODY]] ], [ [[ROWS_STEP:%.*]], [[ROWS_LATCH:%.*]] ] 20; CHECK-NEXT: br label [[ROWS_BODY:%.*]] 21; CHECK: rows.body: 22; CHECK-NEXT: br label [[INNER_HEADER:%.*]] 23; CHECK: inner.header: 24; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] 25; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP11:%.*]], [[INNER_LATCH]] ] 26; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] 27; CHECK-NEXT: br label [[INNER_BODY:%.*]] 28; CHECK: inner.body: 29; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 30; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] 31; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] 32; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 33; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP2]], i64 2 34; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8 35; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[COLS_IV]], 2 36; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] 37; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[TMP4]] 38; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 39; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP5]], i64 2 40; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8 41; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 42; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 43; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0 44; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0 45; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer 46; CHECK-NEXT: [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) 47; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 48; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1 49; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 50; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer 51; CHECK-NEXT: [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]]) 52; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 53; CHECK-NEXT: [[TMP11]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 3> 54; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 55; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 56; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0 57; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0 58; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer 59; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]]) 60; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 61; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1 62; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0 63; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer 64; CHECK-NEXT: [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]]) 65; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 66; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 3> 67; CHECK-NEXT: br label [[INNER_LATCH]] 68; CHECK: inner.latch: 69; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 70; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 71; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP0:![0-9]+]] 72; CHECK: rows.latch: 73; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 74; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 75; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[COLS_IV]], 2 76; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[ROWS_IV]] 77; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[C:%.*]], i64 [[TMP19]] 78; CHECK-NEXT: store volatile <2 x double> [[TMP11]], ptr [[TMP20]], align 8 79; CHECK-NEXT: [[VEC_GEP16:%.*]] = getelementptr double, ptr [[TMP20]], i64 2 80; CHECK-NEXT: store volatile <2 x double> [[TMP17]], ptr [[VEC_GEP16]], align 8 81; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] 82; CHECK: cols.latch: 83; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 84; CHECK-NEXT: [[COLS_COND:%.*]] = icmp ne i64 [[COLS_STEP]], 2 85; CHECK-NEXT: br i1 [[COLS_COND]], label [[COLS_HEADER]], label [[CONTINUE:%.*]] 86; CHECK: continue: 87; CHECK-NEXT: ret void 88; 89 90 91entry: 92 %a = load volatile <4 x double>, ptr %A, align 8 93 %b = load volatile <4 x double>, ptr %B, align 8 94 95 %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) 96 97 store volatile <4 x double> %c, ptr %C, align 8 98 ret void 99} 100 101 102define void @multiply_load0_volatile(ptr noalias %A, ptr noalias %B, ptr noalias %C) { 103; CHECK-LABEL: @multiply_load0_volatile( 104; CHECK-NEXT: entry: 105; CHECK-NEXT: br label [[COLS_HEADER:%.*]] 106; CHECK: cols.header: 107; CHECK-NEXT: [[COLS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[COLS_STEP:%.*]], [[COLS_LATCH:%.*]] ] 108; CHECK-NEXT: br label [[COLS_BODY:%.*]] 109; CHECK: cols.body: 110; CHECK-NEXT: br label [[ROWS_HEADER:%.*]] 111; CHECK: rows.header: 112; CHECK-NEXT: [[ROWS_IV:%.*]] = phi i64 [ 0, [[COLS_BODY]] ], [ [[ROWS_STEP:%.*]], [[ROWS_LATCH:%.*]] ] 113; CHECK-NEXT: br label [[ROWS_BODY:%.*]] 114; CHECK: rows.body: 115; CHECK-NEXT: br label [[INNER_HEADER:%.*]] 116; CHECK: inner.header: 117; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] 118; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP11:%.*]], [[INNER_LATCH]] ] 119; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] 120; CHECK-NEXT: br label [[INNER_BODY:%.*]] 121; CHECK: inner.body: 122; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 123; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] 124; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] 125; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 126; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP2]], i64 2 127; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8 128; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[COLS_IV]], 2 129; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] 130; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[TMP4]] 131; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 132; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP5]], i64 2 133; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8 134; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 135; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 136; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0 137; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0 138; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer 139; CHECK-NEXT: [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) 140; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 141; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1 142; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 143; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer 144; CHECK-NEXT: [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]]) 145; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 146; CHECK-NEXT: [[TMP11]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 3> 147; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 148; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 149; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0 150; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0 151; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer 152; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]]) 153; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 154; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1 155; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0 156; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer 157; CHECK-NEXT: [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]]) 158; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 159; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 3> 160; CHECK-NEXT: br label [[INNER_LATCH]] 161; CHECK: inner.latch: 162; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 163; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 164; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP2:![0-9]+]] 165; CHECK: rows.latch: 166; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 167; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 168; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[COLS_IV]], 2 169; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[ROWS_IV]] 170; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[C:%.*]], i64 [[TMP19]] 171; CHECK-NEXT: store <2 x double> [[TMP11]], ptr [[TMP20]], align 8 172; CHECK-NEXT: [[VEC_GEP16:%.*]] = getelementptr double, ptr [[TMP20]], i64 2 173; CHECK-NEXT: store <2 x double> [[TMP17]], ptr [[VEC_GEP16]], align 8 174; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] 175; CHECK: cols.latch: 176; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 177; CHECK-NEXT: [[COLS_COND:%.*]] = icmp ne i64 [[COLS_STEP]], 2 178; CHECK-NEXT: br i1 [[COLS_COND]], label [[COLS_HEADER]], label [[CONTINUE:%.*]] 179; CHECK: continue: 180; CHECK-NEXT: ret void 181; 182 183 184entry: 185 %a = load volatile <4 x double>, ptr %A, align 8 186 %b = load <4 x double>, ptr %B, align 8 187 188 %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) 189 190 store <4 x double> %c, ptr %C, align 8 191 ret void 192} 193 194define void @multiply_load1_volatile(ptr noalias %A, ptr noalias %B, ptr noalias %C) { 195; CHECK-LABEL: @multiply_load1_volatile( 196; CHECK-NEXT: entry: 197; CHECK-NEXT: br label [[COLS_HEADER:%.*]] 198; CHECK: cols.header: 199; CHECK-NEXT: [[COLS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[COLS_STEP:%.*]], [[COLS_LATCH:%.*]] ] 200; CHECK-NEXT: br label [[COLS_BODY:%.*]] 201; CHECK: cols.body: 202; CHECK-NEXT: br label [[ROWS_HEADER:%.*]] 203; CHECK: rows.header: 204; CHECK-NEXT: [[ROWS_IV:%.*]] = phi i64 [ 0, [[COLS_BODY]] ], [ [[ROWS_STEP:%.*]], [[ROWS_LATCH:%.*]] ] 205; CHECK-NEXT: br label [[ROWS_BODY:%.*]] 206; CHECK: rows.body: 207; CHECK-NEXT: br label [[INNER_HEADER:%.*]] 208; CHECK: inner.header: 209; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] 210; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP11:%.*]], [[INNER_LATCH]] ] 211; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] 212; CHECK-NEXT: br label [[INNER_BODY:%.*]] 213; CHECK: inner.body: 214; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 215; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] 216; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] 217; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 218; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP2]], i64 2 219; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8 220; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[COLS_IV]], 2 221; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] 222; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[TMP4]] 223; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 224; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP5]], i64 2 225; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8 226; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 227; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 228; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0 229; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0 230; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer 231; CHECK-NEXT: [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) 232; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 233; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1 234; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 235; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer 236; CHECK-NEXT: [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]]) 237; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 238; CHECK-NEXT: [[TMP11]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 3> 239; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 240; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 241; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0 242; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0 243; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer 244; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]]) 245; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 246; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1 247; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0 248; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer 249; CHECK-NEXT: [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]]) 250; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 251; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 3> 252; CHECK-NEXT: br label [[INNER_LATCH]] 253; CHECK: inner.latch: 254; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 255; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 256; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP3:![0-9]+]] 257; CHECK: rows.latch: 258; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 259; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 260; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[COLS_IV]], 2 261; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[ROWS_IV]] 262; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[C:%.*]], i64 [[TMP19]] 263; CHECK-NEXT: store <2 x double> [[TMP11]], ptr [[TMP20]], align 8 264; CHECK-NEXT: [[VEC_GEP16:%.*]] = getelementptr double, ptr [[TMP20]], i64 2 265; CHECK-NEXT: store <2 x double> [[TMP17]], ptr [[VEC_GEP16]], align 8 266; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] 267; CHECK: cols.latch: 268; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 269; CHECK-NEXT: [[COLS_COND:%.*]] = icmp ne i64 [[COLS_STEP]], 2 270; CHECK-NEXT: br i1 [[COLS_COND]], label [[COLS_HEADER]], label [[CONTINUE:%.*]] 271; CHECK: continue: 272; CHECK-NEXT: ret void 273; 274 275 276entry: 277 %a = load <4 x double>, ptr %A, align 8 278 %b = load volatile <4 x double>, ptr %B, align 8 279 280 %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) 281 282 store <4 x double> %c, ptr %C, align 8 283 ret void 284} 285 286define void @multiply_store_volatile(ptr noalias %A, ptr noalias %B, ptr noalias %C) { 287; CHECK-LABEL: @multiply_store_volatile( 288; CHECK-NEXT: entry: 289; CHECK-NEXT: br label [[COLS_HEADER:%.*]] 290; CHECK: cols.header: 291; CHECK-NEXT: [[COLS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[COLS_STEP:%.*]], [[COLS_LATCH:%.*]] ] 292; CHECK-NEXT: br label [[COLS_BODY:%.*]] 293; CHECK: cols.body: 294; CHECK-NEXT: br label [[ROWS_HEADER:%.*]] 295; CHECK: rows.header: 296; CHECK-NEXT: [[ROWS_IV:%.*]] = phi i64 [ 0, [[COLS_BODY]] ], [ [[ROWS_STEP:%.*]], [[ROWS_LATCH:%.*]] ] 297; CHECK-NEXT: br label [[ROWS_BODY:%.*]] 298; CHECK: rows.body: 299; CHECK-NEXT: br label [[INNER_HEADER:%.*]] 300; CHECK: inner.header: 301; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] 302; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP11:%.*]], [[INNER_LATCH]] ] 303; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP17:%.*]], [[INNER_LATCH]] ] 304; CHECK-NEXT: br label [[INNER_BODY:%.*]] 305; CHECK: inner.body: 306; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[INNER_IV]], 2 307; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] 308; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[TMP1]] 309; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 310; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[TMP2]], i64 2 311; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x double>, ptr [[VEC_GEP]], align 8 312; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[COLS_IV]], 2 313; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] 314; CHECK-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[B:%.*]], i64 [[TMP4]] 315; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 316; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr double, ptr [[TMP5]], i64 2 317; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x double>, ptr [[VEC_GEP3]], align 8 318; CHECK-NEXT: [[BLOCK:%.*]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 319; CHECK-NEXT: [[BLOCK5:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 320; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 0 321; CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i64 0 322; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer 323; CHECK-NEXT: [[TMP7:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK5]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[BLOCK]]) 324; CHECK-NEXT: [[BLOCK6:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 325; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[COL_LOAD2]], i64 1 326; CHECK-NEXT: [[SPLAT_SPLATINSERT7:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 327; CHECK-NEXT: [[SPLAT_SPLAT8:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT7]], <2 x double> poison, <2 x i32> zeroinitializer 328; CHECK-NEXT: [[TMP9:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK6]], <2 x double> [[SPLAT_SPLAT8]], <2 x double> [[TMP7]]) 329; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 330; CHECK-NEXT: [[TMP11]] = shufflevector <2 x double> [[RESULT_VEC_0]], <2 x double> [[TMP10]], <2 x i32> <i32 2, i32 3> 331; CHECK-NEXT: [[BLOCK9:%.*]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 332; CHECK-NEXT: [[BLOCK10:%.*]] = shufflevector <2 x double> [[COL_LOAD]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 333; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 0 334; CHECK-NEXT: [[SPLAT_SPLATINSERT11:%.*]] = insertelement <2 x double> poison, double [[TMP12]], i64 0 335; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT11]], <2 x double> poison, <2 x i32> zeroinitializer 336; CHECK-NEXT: [[TMP13:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK10]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[BLOCK9]]) 337; CHECK-NEXT: [[BLOCK13:%.*]] = shufflevector <2 x double> [[COL_LOAD1]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 338; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[COL_LOAD4]], i64 1 339; CHECK-NEXT: [[SPLAT_SPLATINSERT14:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i64 0 340; CHECK-NEXT: [[SPLAT_SPLAT15:%.*]] = shufflevector <2 x double> [[SPLAT_SPLATINSERT14]], <2 x double> poison, <2 x i32> zeroinitializer 341; CHECK-NEXT: [[TMP15:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[BLOCK13]], <2 x double> [[SPLAT_SPLAT15]], <2 x double> [[TMP13]]) 342; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x double> [[TMP15]], <2 x double> poison, <2 x i32> <i32 0, i32 1> 343; CHECK-NEXT: [[TMP17]] = shufflevector <2 x double> [[RESULT_VEC_1]], <2 x double> [[TMP16]], <2 x i32> <i32 2, i32 3> 344; CHECK-NEXT: br label [[INNER_LATCH]] 345; CHECK: inner.latch: 346; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 347; CHECK-NEXT: [[INNER_COND:%.*]] = icmp ne i64 [[INNER_STEP]], 2 348; CHECK-NEXT: br i1 [[INNER_COND]], label [[INNER_HEADER]], label [[ROWS_LATCH]], !llvm.loop [[LOOP4:![0-9]+]] 349; CHECK: rows.latch: 350; CHECK-NEXT: [[ROWS_STEP]] = add i64 [[ROWS_IV]], 2 351; CHECK-NEXT: [[ROWS_COND:%.*]] = icmp ne i64 [[ROWS_STEP]], 2 352; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[COLS_IV]], 2 353; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[ROWS_IV]] 354; CHECK-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[C:%.*]], i64 [[TMP19]] 355; CHECK-NEXT: store volatile <2 x double> [[TMP11]], ptr [[TMP20]], align 8 356; CHECK-NEXT: [[VEC_GEP16:%.*]] = getelementptr double, ptr [[TMP20]], i64 2 357; CHECK-NEXT: store volatile <2 x double> [[TMP17]], ptr [[VEC_GEP16]], align 8 358; CHECK-NEXT: br i1 [[ROWS_COND]], label [[ROWS_HEADER]], label [[COLS_LATCH]] 359; CHECK: cols.latch: 360; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 361; CHECK-NEXT: [[COLS_COND:%.*]] = icmp ne i64 [[COLS_STEP]], 2 362; CHECK-NEXT: br i1 [[COLS_COND]], label [[COLS_HEADER]], label [[CONTINUE:%.*]] 363; CHECK: continue: 364; CHECK-NEXT: ret void 365; 366 367entry: 368 %a = load <4 x double>, ptr %A, align 8 369 %b = load <4 x double>, ptr %B, align 8 370 371 %c = call <4 x double> @llvm.matrix.multiply(<4 x double> %a, <4 x double> %b, i32 2, i32 2, i32 2) 372 373 store volatile <4 x double> %c, ptr %C, align 8 374 ret void 375} 376 377declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32) 378