1ccf24225SFlorian Hahn; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 287c99d2bSFlorian Hahn; RUN: opt -matrix-allow-contract=false -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s 3ccf24225SFlorian Hahn 4ccf24225SFlorian Hahn 5ccf24225SFlorian Hahn; Make sure we propagate in multiple iterations. First, we back-propagate the 6ccf24225SFlorian Hahn; shape information from the transpose to %A, in the next iteration we 7ccf24225SFlorian Hahn; forward-propagate it to %Mul, and then back to %B. 8*256d5ad3SMatt Arsenaultdefine <16 x double> @backpropagation_iterations(ptr %A.Ptr, ptr %B.Ptr) { 9ccf24225SFlorian Hahn; CHECK-LABEL: @backpropagation_iterations( 10*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD:%.*]] = load <4 x double>, ptr [[A_PTR:%.*]], align 8 11*256d5ad3SMatt Arsenault; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[A_PTR]], i64 4 12*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <4 x double>, ptr [[VEC_GEP]], align 8 13*256d5ad3SMatt Arsenault; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr [[A_PTR]], i64 8 14*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD3:%.*]] = load <4 x double>, ptr [[VEC_GEP2]], align 8 15*256d5ad3SMatt Arsenault; CHECK-NEXT: [[VEC_GEP4:%.*]] = getelementptr double, ptr [[A_PTR]], i64 12 16*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <4 x double>, ptr [[VEC_GEP4]], align 8 17*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 0 18*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[TMP1]], i64 0 19*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 0 20*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP2]], double [[TMP3]], i64 1 21*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 0 22*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP4]], double [[TMP5]], i64 2 23*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 0 24*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x double> [[TMP6]], double [[TMP7]], i64 3 25*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 1 26*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x double> poison, double [[TMP9]], i64 0 27*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 1 28*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x double> [[TMP10]], double [[TMP11]], i64 1 29*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 1 30*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x double> [[TMP12]], double [[TMP13]], i64 2 31*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 1 32*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x double> [[TMP14]], double [[TMP15]], i64 3 33*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 2 34*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x double> poison, double [[TMP17]], i64 0 35*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 2 36*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x double> [[TMP18]], double [[TMP19]], i64 1 37*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 2 38*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x double> [[TMP20]], double [[TMP21]], i64 2 39*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 2 40*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x double> [[TMP22]], double [[TMP23]], i64 3 41*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x double> [[COL_LOAD]], i64 3 42*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x double> poison, double [[TMP25]], i64 0 43*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x double> [[COL_LOAD1]], i64 3 44*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x double> [[TMP26]], double [[TMP27]], i64 1 45*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x double> [[COL_LOAD3]], i64 3 46*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP29]], i64 2 47*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x double> [[COL_LOAD5]], i64 3 48*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x double> [[TMP30]], double [[TMP31]], i64 3 49*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD6:%.*]] = load <4 x double>, ptr [[B_PTR:%.*]], align 8 50*256d5ad3SMatt Arsenault; CHECK-NEXT: [[VEC_GEP7:%.*]] = getelementptr double, ptr [[B_PTR]], i64 4 51*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <4 x double>, ptr [[VEC_GEP7]], align 8 52*256d5ad3SMatt Arsenault; CHECK-NEXT: [[VEC_GEP9:%.*]] = getelementptr double, ptr [[B_PTR]], i64 8 53*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD10:%.*]] = load <4 x double>, ptr [[VEC_GEP9]], align 8 54*256d5ad3SMatt Arsenault; CHECK-NEXT: [[VEC_GEP11:%.*]] = getelementptr double, ptr [[B_PTR]], i64 12 55*256d5ad3SMatt Arsenault; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <4 x double>, ptr [[VEC_GEP11]], align 8 56*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP33:%.*]] = fmul <4 x double> [[COL_LOAD]], [[COL_LOAD6]] 57*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP34:%.*]] = fmul <4 x double> [[COL_LOAD1]], [[COL_LOAD8]] 58*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP35:%.*]] = fmul <4 x double> [[COL_LOAD3]], [[COL_LOAD10]] 59*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP36:%.*]] = fmul <4 x double> [[COL_LOAD5]], [[COL_LOAD12]] 60*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x double> [[TMP33]], <4 x double> [[TMP34]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 61*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x double> [[TMP35]], <4 x double> [[TMP36]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 62*256d5ad3SMatt Arsenault; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <8 x double> [[TMP37]], <8 x double> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 63*256d5ad3SMatt Arsenault; CHECK-NEXT: ret <16 x double> [[TMP39]] 64ccf24225SFlorian Hahn; 65*256d5ad3SMatt Arsenault %A = load <16 x double>, ptr %A.Ptr, align 8 66ccf24225SFlorian Hahn %A.trans = tail call <16 x double> @llvm.matrix.transpose.v16f64(<16 x double> %A, i32 4, i32 4) 67*256d5ad3SMatt Arsenault %B = load <16 x double>, ptr %B.Ptr, align 8 68ccf24225SFlorian Hahn %Mul = fmul <16 x double> %A, %B 69ccf24225SFlorian Hahn ret <16 x double> %Mul 70ccf24225SFlorian Hahn} 71ccf24225SFlorian Hahn 72ccf24225SFlorian Hahndeclare <16 x double> @llvm.matrix.multiply.v16f64.v16f64.v16f64(<16 x double>, <16 x double>, i32 immarg, i32 immarg, i32 immarg) 73ccf24225SFlorian Hahndeclare <16 x double> @llvm.matrix.transpose.v16f64(<16 x double>, i32 immarg, i32 immarg) 74