1; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 \ 2; RUN: | FileCheck %s -check-prefixes=CHECK,Z13 3; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z15 \ 4; RUN: | FileCheck %s -check-prefixes=CHECK,Z15 5 6define void @bswap_i128(i128 %arg) { 7; CHECK: function 'bswap_i128' 8; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp = tail call i128 @llvm.bswap.i128(i128 %arg) 9 %swp = tail call i128 @llvm.bswap.i128(i128 %arg) 10 ret void 11} 12 13define void @bswap_i64(i64 %arg, <2 x i64> %arg2) { 14; CHECK: function 'bswap_i64' 15; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64 16; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> 17; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %swp4 = tail call <4 x i64> 18 %swp1 = tail call i64 @llvm.bswap.i64(i64 %arg) 19 %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg2) 20 %swp4 = tail call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef) 21 ret void 22} 23 24define void @bswap_i32(i32 %arg, <2 x i32> %arg2, <4 x i32> %arg4) { 25; CHECK: function 'bswap_i32' 26; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i32 27; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i32> 28; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp4 = tail call <4 x i32> 29; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %swp8 = tail call <8 x i32> 30 %swp1 = tail call i32 @llvm.bswap.i32(i32 %arg) 31 %swp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %arg2) 32 %swp4 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg4) 33 %swp8 = tail call <8 x i32> @llvm.bswap.v8i32(<8 x i32> undef) 34 ret void 35} 36 37define void @bswap_i16(i16 %arg, <2 x i16> %arg2, <4 x i16> %arg4, 38 <8 x i16> %arg8) { 39; CHECK: function 'bswap_i16' 40; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i16 @llvm.bswap.i16(i16 %arg) 41; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %arg2) 42; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp4 = tail call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %arg4) 43; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp8 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg8) 44; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %swp16 = tail call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef) 45 %swp1 = tail call i16 @llvm.bswap.i16(i16 %arg) 46 %swp2 = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %arg2) 47 %swp4 = tail call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %arg4) 48 %swp8 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg8) 49 %swp16 = tail call <16 x i16> @llvm.bswap.v16i16(<16 x i16> undef) 50 ret void 51} 52 53; Test that store/load reversed is reflected in costs. 54define void @bswap_i64_mem(ptr %src, i64 %arg, ptr %dst) { 55; CHECK: function 'bswap_i64_mem' 56; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i64, ptr %src 57; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1) 58; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg) 59; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp2, ptr %dst 60; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i64, ptr %src 61; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2) 62; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i64 %swp3, ptr %dst 63 %Ld1 = load i64, ptr %src 64 %swp1 = tail call i64 @llvm.bswap.i64(i64 %Ld1) 65 66 %swp2 = tail call i64 @llvm.bswap.i64(i64 %arg) 67 store i64 %swp2, ptr %dst 68 69 %Ld2 = load i64, ptr %src 70 %swp3 = tail call i64 @llvm.bswap.i64(i64 %Ld2) 71 store i64 %swp3, ptr %dst 72 73 ret void 74} 75 76define void @bswap_v2i64_mem(ptr %src, <2 x i64> %arg, ptr %dst) { 77; CHECK:function 'bswap_v2i64_mem' 78; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, ptr %src 79; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, ptr %src 80; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1) 81; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg) 82; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, ptr %dst 83; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, ptr %dst 84; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, ptr %src 85; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2) 86; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, ptr %dst 87; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, ptr %dst 88 89 %Ld1 = load <2 x i64>, ptr %src 90 %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1) 91 92 %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg) 93 store <2 x i64> %swp2, ptr %dst 94 95 %Ld2 = load <2 x i64>, ptr %src 96 %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2) 97 store <2 x i64> %swp3, ptr %dst 98 99 ret void 100} 101 102define void @bswap_i32_mem(ptr %src, i32 %arg, ptr %dst) { 103; CHECK: function 'bswap_i32_mem' 104; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, ptr %src 105; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1) 106; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg) 107; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp2, ptr %dst 108; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i32, ptr %src 109; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2) 110; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i32 %swp3, ptr %dst 111 %Ld1 = load i32, ptr %src 112 %swp1 = tail call i32 @llvm.bswap.i32(i32 %Ld1) 113 114 %swp2 = tail call i32 @llvm.bswap.i32(i32 %arg) 115 store i32 %swp2, ptr %dst 116 117 %Ld2 = load i32, ptr %src 118 %swp3 = tail call i32 @llvm.bswap.i32(i32 %Ld2) 119 store i32 %swp3, ptr %dst 120 121 ret void 122} 123 124define void @bswap_v4i32_mem(ptr %src, <4 x i32> %arg, ptr %dst) { 125; CHECK: function 'bswap_v4i32_mem' 126; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, ptr %src 127; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, ptr %src 128; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1) 129; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg) 130; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, ptr %dst 131; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, ptr %dst 132; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, ptr %src 133; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2) 134; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, ptr %dst 135; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, ptr %dst 136%Ld1 = load <4 x i32>, ptr %src 137 %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1) 138 139 %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg) 140 store <4 x i32> %swp2, ptr %dst 141 142 %Ld2 = load <4 x i32>, ptr %src 143 %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2) 144 store <4 x i32> %swp3, ptr %dst 145 146 ret void 147} 148 149define void @bswap_i16_mem(ptr %src, i16 %arg, ptr %dst) { 150; CHECK: function 'bswap_i16_mem' 151; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, ptr %src 152; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1) 153; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg) 154; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp2, ptr %dst 155; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load i16, ptr %src 156; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2) 157; CHECK: Cost Model: Found an estimated cost of 0 for instruction: store i16 %swp3, ptr %dst 158 %Ld1 = load i16, ptr %src 159 %swp1 = tail call i16 @llvm.bswap.i16(i16 %Ld1) 160 161 %swp2 = tail call i16 @llvm.bswap.i16(i16 %arg) 162 store i16 %swp2, ptr %dst 163 164 %Ld2 = load i16, ptr %src 165 %swp3 = tail call i16 @llvm.bswap.i16(i16 %Ld2) 166 store i16 %swp3, ptr %dst 167 168 ret void 169} 170 171define void @bswap_v8i16_mem(ptr %src, <8 x i16> %arg, ptr %dst) { 172; CHECK: function 'bswap_v8i16_mem' 173; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, ptr %src 174; Z15: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, ptr %src 175; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1) 176; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg) 177; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, ptr %dst 178; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, ptr %dst 179; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, ptr %src 180; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2) 181; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, ptr %dst 182; Z15: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, ptr %dst 183%Ld1 = load <8 x i16>, ptr %src 184 %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1) 185 186 %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg) 187 store <8 x i16> %swp2, ptr %dst 188 189 %Ld2 = load <8 x i16>, ptr %src 190 %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2) 191 store <8 x i16> %swp3, ptr %dst 192 193 ret void 194} 195 196declare i128 @llvm.bswap.i128(i128) 197 198declare i64 @llvm.bswap.i64(i64) 199declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 200declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) 201 202declare i32 @llvm.bswap.i32(i32) 203declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) 204declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 205declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) 206 207declare i16 @llvm.bswap.i16(i16) 208declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) 209declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>) 210declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 211declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) 212