1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature 2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -passes=inline | FileCheck %s 3 4; Test for PR52660. 5 6; This call should not get inlined, because it would make the callee_not_avx 7; call ABI incompatible. 8define void @caller_avx() "target-features"="+avx" { 9; CHECK-LABEL: define {{[^@]+}}@caller_avx 10; CHECK-SAME: () #[[ATTR0:[0-9]+]] { 11; CHECK-NEXT: call void @caller_not_avx() 12; CHECK-NEXT: ret void 13; 14 call void @caller_not_avx() 15 ret void 16} 17 18define internal void @caller_not_avx() { 19; CHECK-LABEL: define {{[^@]+}}@caller_not_avx() { 20; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_not_avx(<4 x i64> <i64 0, i64 1, i64 2, i64 3>) 21; CHECK-NEXT: ret void 22; 23 call i64 @callee_not_avx(<4 x i64> <i64 0, i64 1, i64 2, i64 3>) 24 ret void 25} 26 27define i64 @callee_not_avx(<4 x i64> %arg) noinline { 28; CHECK-LABEL: define {{[^@]+}}@callee_not_avx 29; CHECK-SAME: (<4 x i64> [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { 30; CHECK-NEXT: [[V:%.*]] = extractelement <4 x i64> [[ARG]], i64 2 31; CHECK-NEXT: ret i64 [[V]] 32; 33 %v = extractelement <4 x i64> %arg, i64 2 34 ret i64 %v 35} 36 37; This call also shouldn't be inlined, as we don't know whether callee_unknown 38; is ABI compatible or not. 39define void @caller_avx2() "target-features"="+avx" { 40; CHECK-LABEL: define {{[^@]+}}@caller_avx2 41; CHECK-SAME: () #[[ATTR0]] { 42; CHECK-NEXT: call void @caller_not_avx2() 43; CHECK-NEXT: ret void 44; 45 call void @caller_not_avx2() 46 ret void 47} 48 49define internal void @caller_not_avx2() { 50; CHECK-LABEL: define {{[^@]+}}@caller_not_avx2() { 51; CHECK-NEXT: [[TMP1:%.*]] = call i64 @callee_unknown(<4 x i64> <i64 0, i64 1, i64 2, i64 3>) 52; CHECK-NEXT: ret void 53; 54 call i64 @callee_unknown(<4 x i64> <i64 0, i64 1, i64 2, i64 3>) 55 ret void 56} 57 58declare i64 @callee_unknown(<4 x i64>) 59 60; This call should get inlined, because we assume that intrinsics are always 61; ABI compatible. 62define void @caller_avx3() "target-features"="+avx" { 63; CHECK-LABEL: define {{[^@]+}}@caller_avx3 64; CHECK-SAME: () #[[ATTR0]] { 65; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.some_intrinsic(<4 x i64> <i64 0, i64 1, i64 2, i64 3>) 66; CHECK-NEXT: ret void 67; 68 call void @caller_not_avx3() 69 ret void 70} 71 72define internal void @caller_not_avx3() { 73 call i64 @llvm.some_intrinsic(<4 x i64> <i64 0, i64 1, i64 2, i64 3>) 74 ret void 75} 76 77declare i64 @llvm.some_intrinsic(<4 x i64>) 78 79; This call should get inlined, because only simple types are involved. 80define void @caller_avx4() "target-features"="+avx" { 81; CHECK-LABEL: define {{[^@]+}}@caller_avx4 82; CHECK-SAME: () #[[ATTR0]] { 83; CHECK-NEXT: [[TMP1:%.*]] = call i64 @caller_unknown_simple(i64 0) 84; CHECK-NEXT: ret void 85; 86 call void @caller_not_avx4() 87 ret void 88} 89 90define internal void @caller_not_avx4() { 91 call i64 @caller_unknown_simple(i64 0) 92 ret void 93} 94 95declare i64 @caller_unknown_simple(i64) 96 97; This call should get inlined, because the callee only contains 98; inline ASM, not real calls. 99define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 { 100; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm 101; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] { 102; CHECK-NEXT: [[SRC_I:%.*]] = load <8 x i64>, ptr [[P0]], align 64 103; CHECK-NEXT: [[A_I:%.*]] = load <8 x i64>, ptr [[P1]], align 64 104; CHECK-NEXT: [[B_I:%.*]] = load <8 x i64>, ptr [[P2]], align 64 105; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A_I]], <8 x i64> [[B_I]], <8 x i64> [[SRC_I]]) 106; CHECK-NEXT: ret <8 x i64> [[TMP1]] 107; 108 %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) 109 ret <8 x i64> %call 110} 111 112define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 { 113 %src = load <8 x i64>, ptr %p0, align 64 114 %a = load <8 x i64>, ptr %p1, align 64 115 %b = load <8 x i64>, ptr %p2, align 64 116 %3 = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 %k, <8 x i64> %a, <8 x i64> %b, <8 x i64> %src) #2 117 ret <8 x i64> %3 118} 119 120attributes #0 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } 121attributes #1 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } 122