1; REQUIRES: aarch64-registered-target 2 3; This test needs to be target specific due to the cost estimate in the output. 4 5; RUN: opt -passes=lower-matrix-intrinsics -pass-remarks-output=%t -pass-remarks=lower-matrix-intrinsics -mtriple=arm64-apple-iphoneos %s 2>&1 -disable-output | FileCheck --check-prefix=STDERR %s 6; RUN: FileCheck --input-file=%t --check-prefix=YAML %s 7 8; YAML-LABEL: --- !Passed 9; YAML-NEXT: Pass: lower-matrix-intrinsics 10; YAML-NEXT: Name: matrix-lowered 11; YAML-NEXT: DebugLoc: { File: test.cpp, Line: 35, Column: 71 } 12; YAML-NEXT: Function: test_2leafs 13; YAML-NEXT: Args: 14; YAML-NEXT: - String: 'Lowered with ' 15; YAML-NEXT: - NumStores: '4' 16; YAML-NEXT: - String: ' stores, ' 17; YAML-NEXT: - NumLoads: '0' 18; YAML-NEXT: - String: ' loads, ' 19; YAML-NEXT: - NumComputeOps: '0' 20; YAML-NEXT: - String: ' compute ops, ' 21; YAML-NEXT: - NumExposedTransposes: '0' 22; YAML-NEXT: - String: ' exposed transposes' 23; YAML-NEXT: - String: ",\nadditionally " 24; YAML-NEXT: - NumStores: '0' 25; YAML-NEXT: - String: ' stores, ' 26; YAML-NEXT: - NumLoads: '4' 27; YAML-NEXT: - String: ' loads, ' 28; YAML-NEXT: - NumFPOps: '16' 29; YAML-NEXT: - String: ' compute ops' 30; YAML-NEXT: - String: ' are shared with other expressions' 31; YAML-NEXT: - String: | 32; YAML: column.major.store.4x2.double( 33; YAML-NEXT: shared with remark at line 35 column 45 (transpose.2x4.double(column.major.load.2x4.double(addr %arg1, 34; YAML-NEXT: scalar)), 35; YAML-NEXT: addr %arg3, 36; YAML-NEXT: 10) 37 38; YAML-LABEL: --- !Passed 39; YAML-NEXT: Pass: lower-matrix-intrinsics 40; YAML-NEXT: Name: matrix-lowered 41; YAML-NEXT: DebugLoc: { File: test.cpp, Line: 35, Column: 45 } 42; YAML-NEXT: Function: test_2leafs 43; YAML-NEXT: Args: 44; YAML-NEXT: - String: 'Lowered with ' 45; YAML-NEXT: - NumStores: '30' 46; YAML-NEXT: - String: ' stores, ' 47; YAML-NEXT: - NumLoads: '45' 48; YAML-NEXT: - String: ' loads, ' 49; YAML-NEXT: - NumComputeOps: '120' 50; YAML-NEXT: - String: ' compute ops, ' 51; YAML-NEXT: - NumExposedTransposes: '0' 52; YAML-NEXT: - String: ' exposed transposes' 53; YAML-NEXT: - String: ",\nadditionally " 54; YAML-NEXT: - NumStores: '0' 55; YAML-NEXT: - String: ' stores, ' 56; YAML-NEXT: - NumLoads: '4' 57; YAML-NEXT: - String: ' loads, ' 58; YAML-NEXT: - NumFPOps: '16' 59; YAML-NEXT: - String: ' compute ops' 60; YAML-NEXT: - String: ' are shared with other expressions' 61; YAML-NEXT: - String: | 62; YAML: column.major.store.4x15.double( 63; YAML-NEXT: fsub( 64; YAML-NEXT: column.major.load.4x15.double(addr %arg2, 20), 65; YAML-NEXT: multiply.4x2.2x15.double( 66; YAML-NEXT: shared with remark at line 35 column 71 (transpose.2x4.double(column.major.load.2x4.double(addr %arg1, 67; YAML-NEXT: scalar)), 68; YAML-NEXT: column.major.load.2x15.double(addr %arg3, scalar))), 69; YAML-NEXT: addr %arg2, 70; YAML-NEXT: 10) 71 72 73; STDERR-LABEL: remark: test.cpp:35:71: Lowered with 4 stores, 0 loads, 0 compute ops, 74; STDERR-NEXT: additionally 0 stores, 4 loads, 16 compute ops are shared with other expressions 75; STDERR-NEXT: column.major.store.4x2.double( 76; STDERR-NEXT: shared with remark at line 35 column 45 (transpose.2x4.double(column.major.load.2x4.double(addr %arg1, 77; STDERR-NEXT: scalar)), 78; STDERR-NEXT: addr %arg3, 79; STDERR-NEXT: 10) 80 81; STDERR-LABEL: remark: test.cpp:35:45: Lowered with 30 stores, 45 loads, 120 compute ops, 82; STDERR-NEXT: additionally 0 stores, 4 loads, 16 compute ops are shared with other expressions 83; STDERR-NEXT: column.major.store.4x15.double( 84; STDERR-NEXT: fsub( 85; STDERR-NEXT: column.major.load.4x15.double(addr %arg2, 20), 86; STDERR-NEXT: multiply.4x2.2x15.double( 87; STDERR-NEXT: shared with remark at line 35 column 71 (transpose.2x4.double(column.major.load.2x4.double(addr %arg1, 88; STDERR-NEXT: scalar)), 89; STDERR-NEXT: column.major.load.2x15.double(addr %arg3, scalar))), 90; STDERR-NEXT: addr %arg2, 91; STDERR-NEXT: 10) 92define void @test_2leafs(ptr %arg1, ptr %arg2, ptr %arg3, i64 %stride) !dbg !8 { 93bb: 94 %shared.load = tail call <8 x double> @llvm.matrix.column.major.load.v8f64.p0(ptr %arg1, i64 %stride, i1 false, i32 2, i32 4), !dbg !10 95 %shared.load.2 = tail call <30 x double> @llvm.matrix.column.major.load.v30f64.p0(ptr %arg3, i64 %stride, i1 false, i32 2, i32 15), !dbg !10 96 %tmp17 = tail call <8 x double> @llvm.matrix.transpose.v8f64(<8 x double> %shared.load, i32 2, i32 4), !dbg !10 97 tail call void @llvm.matrix.column.major.store.v8f64.p0(<8 x double> %tmp17, ptr %arg3, i64 10, i1 false, i32 4, i32 2), !dbg !10 98 %tmp18 = tail call <60 x double> @llvm.matrix.column.major.load.v60f64.p0(ptr %arg2, i64 20, i1 false, i32 4, i32 15), !dbg !11 99 %tmp48 = tail call <60 x double> @llvm.matrix.multiply.v60f64.v8f64.v30f64(<8 x double> %tmp17, <30 x double> %shared.load.2, i32 4, i32 2, i32 15), !dbg !11 100 %tmp49 = fsub <60 x double> %tmp18, %tmp48, !dbg !11 101 tail call void @llvm.matrix.column.major.store.v60f64.p0(<60 x double> %tmp49, ptr %arg2, i64 10, i1 false, i32 4, i32 15), !dbg !11 102 ret void 103} 104 105declare <8 x double> @llvm.matrix.transpose.v8f64(<8 x double>, i32 immarg, i32 immarg) 106declare <8 x double> @llvm.matrix.column.major.load.v8f64.p0(ptr, i64, i1 immarg, i32 immarg, i32 immarg) 107declare <30 x double> @llvm.matrix.column.major.load.v30f64.p0(ptr, i64, i1 immarg, i32 immarg, i32 immarg) 108declare <60 x double> @llvm.matrix.column.major.load.v60f64.p0(ptr, i64, i1 immarg, i32 immarg, i32 immarg) 109declare void @llvm.matrix.column.major.store.v60f64.p0(<60 x double>, ptr writeonly, i64, i1 immarg, i32 immarg, i32 immarg) 110declare void @llvm.matrix.column.major.store.v8f64.p0(<8 x double>, ptr writeonly, i64, i1 immarg, i32 immarg, i32 immarg) 111declare <60 x double> @llvm.matrix.multiply.v60f64.v8f64.v30f64(<8 x double>, <30 x double>, i32 immarg, i32 immarg, i32 immarg) 112 113!llvm.module.flags = !{!0, !1, !2, !3} 114!llvm.dbg.cu = !{!4} 115!llvm.ident = !{!7} 116 117!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 13, i32 0]} 118!1 = !{i32 2, !"Debug Info Version", i32 3} 119!2 = !{i32 1, !"wchar_size", i32 4} 120!3 = !{i32 7, !"PIC Level", i32 2} 121!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !5, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !6, nameTableKind: GNU) 122!5 = !DIFile(filename: "test.cpp", directory: "") 123!6 = !{} 124!7 = !{!"clang"} 125!8 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 26, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 126!9 = !DISubroutineType(types: !6) 127!10 = distinct !DILocation(line: 35, column: 71, scope: !8) 128!11 = distinct !DILocation(line: 35, column: 45, scope: !8) 129!12 = !DILocation(line: 800, column: 17, scope: !13, inlinedAt: !15) 130!13 = distinct !DISubprogram(name: "foo", scope: !14, file: !14, line: 789, type: !9, scopeLine: 790, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 131!14 = !DIFile(filename: "bar.h", directory: "bar") 132!15 = distinct !DILocation(line: 1280, column: 5, scope: !16, inlinedAt: !18) 133!16 = distinct !DISubprogram(name: "zar", scope: !17, file: !17, line: 1275, type: !9, scopeLine: 1278, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 134!17 = !DIFile(filename: "file1.h", directory: "dir1") 135!18 = distinct !DILocation(line: 1278, column: 1, scope: !19, inlinedAt: !20) 136!19 = distinct !DISubprogram(name: "yo", scope: !17, file: !17, line: 1275, type: !9, scopeLine: 1278, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 137!20 = distinct !DILocation(line: 2514, column: 26, scope: !21, inlinedAt: !22) 138!21 = distinct !DISubprogram(name: "zzzz", scope: !14, file: !14, line: 2505, type: !9, scopeLine: 2506, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 139!22 = distinct !DILocation(line: 1263, column: 5, scope: !23, inlinedAt: !24) 140!23 = distinct !DISubprogram(name: "ppppp", scope: !17, file: !17, line: 1258, type: !9, scopeLine: 1261, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 141!24 = distinct !DILocation(line: 1261, column: 1, scope: !25, inlinedAt: !26) 142!25 = distinct !DISubprogram(name: "qqqq", scope: !17, file: !17, line: 1258, type: !9, scopeLine: 1261, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 143!26 = distinct !DILocation(line: 168, column: 7, scope: !27, inlinedAt: !29) 144!27 = distinct !DISubprogram(name: "lll", scope: !28, file: !28, line: 166, type: !9, scopeLine: 169, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 145!28 = !DIFile(filename: "file2.h", directory: "dir2") 146!29 = distinct !DILocation(line: 169, column: 1, scope: !30, inlinedAt: !31) 147!30 = distinct !DISubprogram(name: "Expr1", scope: !28, file: !28, line: 166, type: !9, scopeLine: 169, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 148!31 = distinct !DILocation(line: 368, column: 12, scope: !32, inlinedAt: !33) 149!32 = distinct !DISubprogram(name: "yyyyy", scope: !14, file: !14, line: 364, type: !9, scopeLine: 365, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 150!33 = distinct !DILocation(line: 1297, column: 34, scope: !34, inlinedAt: !35) 151!34 = distinct !DISubprogram(name: "eeeee", scope: !14, file: !14, line: 1290, type: !9, scopeLine: 1291, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 152!35 = distinct !DILocation(line: 2306, column: 5, scope: !36, inlinedAt: !11) 153!36 = distinct !DISubprogram(name: "aaaaa", scope: !37, file: !37, line: 2304, type: !9, scopeLine: 2305, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 154!37 = !DIFile(filename: "foo.c", directory: "/") 155!38 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 156!39 = distinct !DILocation(line: 44, column: 44, scope: !38) 157!40 = distinct !DILocation(line: 55, column: 55, scope: !38) 158!41 = distinct !DILocation(line: 66, column: 66, scope: !38) 159!42 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 160!43 = distinct !DILocation(line: 77, column: 77, scope: !42) 161!44 = distinct !DILocation(line: 88, column: 88, scope: !42) 162!45 = distinct !DISubprogram(name: "test2", scope: !5, file: !5, line: 90, type: !9, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, retainedNodes: !6) 163!46 = distinct !DILocation(line: 99, column: 99, scope: !45) 164!47 = distinct !DILocation(line: 111, column: 111, scope: !45) 165