1; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s 2 3define <9 x double> @strided_load_3x3_volatile(ptr %in, i64 %stride) { 4; CHECK-LABEL: @strided_load_3x3_volatile( 5; CHECK-NEXT: entry: 6; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]] 7; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]] 8; CHECK-NEXT: load volatile <3 x double>, ptr [[VEC_GEP]], align 8 9; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]] 10; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]] 11; CHECK-NEXT: load volatile <3 x double>, ptr [[VEC_GEP2]], align 8 12; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]] 13; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]] 14; CHECK-NEXT: load volatile <3 x double>, ptr [[VEC_GEP6]], align 8 15; CHECK-NOT: = load 16; 17entry: 18 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr %in, i64 %stride, i1 true, i32 3, i32 3) 19 ret <9 x double> %load 20} 21 22declare <9 x double> @llvm.matrix.column.major.load.v9f64(ptr, i64, i1, i32, i32) 23 24define <4 x double> @load_volatile_multiply(ptr %in) { 25; CHECK-LABEL: @load_volatile_multiply( 26; CHECK-NEXT: load volatile <2 x double>, ptr [[IN:%.*]], align 8 27; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2 28; CHECK-NEXT: load volatile <2 x double>, ptr [[VEC_GEP]], align 8 29; CHECK-NOT: = load 30; 31 %in.m = load volatile <4 x double>, ptr %in, align 8 32 %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2) 33 ret <4 x double> %res 34} 35 36declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32) 37 38 39define <9 x double> @strided_load_3x3_align32(ptr %in, i64 %stride) { 40; CHECK-LABEL: @strided_load_3x3_align32( 41; CHECK-NEXT: entry: 42; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]] 43; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]] 44; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP]], align 32 45; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]] 46; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]] 47; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP2]], align 8 48; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]] 49; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]] 50; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP6]], align 8 51; CHECK-NOT: = load 52; 53entry: 54 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr align 32 %in, i64 %stride, i1 false, i32 3, i32 3) 55 ret <9 x double> %load 56} 57 58define <9 x double> @strided_load_3x3_align2(ptr %in, i64 %stride) { 59; CHECK-LABEL: @strided_load_3x3_align2( 60; CHECK-NEXT: entry: 61; CHECK-NEXT: [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]] 62; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START]] 63; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP]], align 2 64; CHECK-NEXT: [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]] 65; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START1]] 66; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP2]], align 2 67; CHECK-NEXT: [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]] 68; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, ptr %in, i64 [[VEC_START5]] 69; CHECK-NEXT: load <3 x double>, ptr [[VEC_GEP6]], align 2 70; CHECK-NOT: = load 71; 72entry: 73 %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(ptr align 2 %in, i64 %stride, i1 false, i32 3, i32 3) 74 ret <9 x double> %load 75} 76 77 78define <4 x double> @load_align2_multiply(ptr %in) { 79; CHECK-LABEL: @load_align2_multiply( 80; CHECK-NEXT: load <2 x double>, ptr [[IN:%.*]], align 2 81; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, ptr [[IN]], i64 2 82; CHECK-NEXT: load <2 x double>, ptr [[VEC_GEP]], align 2 83; CHECK-NOT: = load 84; 85 %in.m = load <4 x double>, ptr %in, align 2 86 %res = call <4 x double> @llvm.matrix.multiply(<4 x double> %in.m, <4 x double> %in.m, i32 2, i32 2, i32 2) 87 ret <4 x double> %res 88} 89 90define <6 x float> @strided_load_2x3_align16_stride2(ptr %in) { 91; CHECK-LABEL: @strided_load_2x3_align16_stride2( 92; CHECK-NEXT: entry: 93; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr %in, align 16 94; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr %in, i64 2 95; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 96; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr %in, i64 4 97; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 16 98; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 99; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[COL_LOAD5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 100; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> 101; CHECK-NEXT: ret <6 x float> [[TMP3]] 102; 103entry: 104 %load = call <6 x float> @llvm.matrix.column.major.load.v6f32(ptr align 16 %in, i64 2, i1 false, i32 2, i32 3) 105 ret <6 x float> %load 106} 107 108declare <6 x float> @llvm.matrix.column.major.load.v6f32(ptr, i64, i1, i32, i32) 109