1; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s -check-prefix=SVE 2; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefix=SVE2 3 4; SRHADD 5 6define void @srhadd_i8_sext_i16_fixed(ptr %a, ptr %b, ptr %dst) { 7; SVE-LABEL: 'srhadd_i8_sext_i16_fixed' 8; SVE: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <16 x i8> %ld1 to <16 x i16> 9; SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <16 x i8> %ld2 to <16 x i16> 10; 11; SVE2-LABEL: 'srhadd_i8_sext_i16_fixed' 12; SVE2: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <16 x i8> %ld1 to <16 x i16> 13; SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <16 x i8> %ld2 to <16 x i16> 14; 15 %ld1 = load <16 x i8>, ptr %a 16 %ld2 = load <16 x i8>, ptr %b 17 %ext1 = sext <16 x i8> %ld1 to <16 x i16> 18 %ext2 = sext <16 x i8> %ld2 to <16 x i16> 19 %add1 = add nuw nsw <16 x i16> %ext1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 1, i64 0), <16 x i16> poison, <16 x i32> zeroinitializer) 20 %add2 = add nuw nsw <16 x i16> %add1, %ext2 21 %shr = lshr <16 x i16> %add2, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 1, i64 0), <16 x i16> poison, <16 x i32> zeroinitializer) 22 %trunc = trunc <16 x i16> %shr to <16 x i8> 23 store <16 x i8> %trunc, ptr %a 24 ret void 25} 26 27define void @srhadd_i8_sext_i16_scalable(ptr %a, ptr %b, ptr %dst) { 28; SVE-LABEL: 'srhadd_i8_sext_i16_scalable' 29; SVE: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 30; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = sext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 31; 32; SVE2-LABEL: 'srhadd_i8_sext_i16_scalable' 33; SVE2: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 34; SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 35; 36 %ld1 = load <vscale x 16 x i8>, ptr %a 37 %ld2 = load <vscale x 16 x i8>, ptr %b 38 %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 39 %ext2 = sext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 40 %add1 = add nuw nsw <vscale x 16 x i16> %ext1, splat (i16 1) 41 %add2 = add nuw nsw <vscale x 16 x i16> %add1, %ext2 42 %shr = lshr <vscale x 16 x i16> %add2, splat (i16 1) 43 %trunc = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8> 44 store <vscale x 16 x i8> %trunc, ptr %a 45 ret void 46} 47 48define void @srhadd_i16_sext_i64_scalable(ptr %a, ptr %b, ptr %dst) { 49; SVE-LABEL: 'srhadd_i16_sext_i64_scalable' 50; SVE: Cost Model: Found an estimated cost of 6 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i64> 51; SVE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i64> 52; 53; SVE2-LABEL: 'srhadd_i16_sext_i64_scalable' 54; SVE2: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i64> 55; SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i64> 56; 57 %ld1 = load <vscale x 8 x i16>, ptr %a 58 %ld2 = load <vscale x 8 x i16>, ptr %b 59 %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i64> 60 %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i64> 61 %add1 = add nuw nsw <vscale x 8 x i64> %ext1, splat (i64 1) 62 %add2 = add nuw nsw <vscale x 8 x i64> %add1, %ext2 63 %shr = lshr <vscale x 8 x i64> %add2, splat (i64 1) 64 %trunc = trunc <vscale x 8 x i64> %shr to <vscale x 8 x i16> 65 store <vscale x 8 x i16> %trunc, ptr %a 66 ret void 67} 68 69; URHADD 70 71define void @urhadd_i32_zext_i64_fixed(ptr %a, ptr %b, ptr %dst) { 72; SVE-LABEL: 'urhadd_i32_zext_i64_fixed' 73; SVE: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <4 x i32> %ld1 to <4 x i64> 74; SVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <4 x i32> %ld2 to <4 x i64> 75; 76; SVE2-LABEL: 'urhadd_i32_zext_i64_fixed' 77; SVE2: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <4 x i32> %ld1 to <4 x i64> 78; SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <4 x i32> %ld2 to <4 x i64> 79; 80 %ld1 = load <4 x i32>, ptr %a 81 %ld2 = load <4 x i32>, ptr %b 82 %ext1 = zext <4 x i32> %ld1 to <4 x i64> 83 %ext2 = zext <4 x i32> %ld2 to <4 x i64> 84 %add1 = add nuw nsw <4 x i64> %ext1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 1, i64 0), <4 x i64> poison, <4 x i32> zeroinitializer) 85 %add2 = add nuw nsw <4 x i64> %add1, %ext2 86 %shr = lshr <4 x i64> %add2, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 1, i64 0), <4 x i64> poison, <4 x i32> zeroinitializer) 87 %trunc = trunc <4 x i64> %shr to <4 x i32> 88 store <4 x i32> %trunc, ptr %a 89 ret void 90} 91 92define void @urhadd_i8_zext_i64(ptr %a, ptr %b, ptr %dst) { 93; SVE-LABEL: 'urhadd_i8_zext_i64' 94; SVE: Cost Model: Found an estimated cost of 14 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i64> 95; SVE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i64> 96; 97; SVE2-LABEL: 'urhadd_i8_zext_i64' 98; SVE2: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i64> 99; SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i64> 100; 101 %ld1 = load <vscale x 16 x i8>, ptr %a 102 %ld2 = load <vscale x 16 x i8>, ptr %b 103 %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i64> 104 %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i64> 105 %add1 = add nuw nsw <vscale x 16 x i64> %ext1, splat (i64 1) 106 %add2 = add nuw nsw <vscale x 16 x i64> %add1, %ext2 107 %shr = lshr <vscale x 16 x i64> %add2, splat (i64 1) 108 %trunc = trunc <vscale x 16 x i64> %shr to <vscale x 16 x i8> 109 store <vscale x 16 x i8> %trunc, ptr %a 110 ret void 111} 112 113define void @urhadd_i16_zext_i32(ptr %a, ptr %b, ptr %dst) { 114; SVE-LABEL: 'urhadd_i16_zext_i32' 115; SVE: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = zext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32> 116; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32> 117; 118; SVE2-LABEL: 'urhadd_i16_zext_i32' 119; SVE2: Cost Model: Found an estimated cost of 0 for instruction: %ext1 = zext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32> 120; SVE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %ext2 = zext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32> 121; 122 %ld1 = load <vscale x 8 x i16>, ptr %a 123 %ld2 = load <vscale x 8 x i16>, ptr %b 124 %ext1 = zext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32> 125 %ext2 = zext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32> 126 %add1 = add nuw nsw <vscale x 8 x i32> %ext1, splat (i32 1) 127 %add2 = add nuw nsw <vscale x 8 x i32> %add1, %ext2 128 %shr = lshr <vscale x 8 x i32> %add2, splat (i32 1) 129 %trunc = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16> 130 store <vscale x 8 x i16> %trunc, ptr %a 131 ret void 132} 133 134; NEGATIVE TESTS 135 136define void @ext_operand_mismatch(ptr %a, ptr %b, ptr %dst) { 137; SVE-LABEL: 'ext_operand_mismatch' 138; SVE: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 139; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 140; 141; SVE2-LABEL: 'ext_operand_mismatch' 142; SVE2: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 143; SVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 144; 145 %ld1 = load <vscale x 16 x i8>, ptr %a 146 %ld2 = load <vscale x 16 x i8>, ptr %b 147 %ext1 = sext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 148 %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 149 %add1 = add nuw nsw <vscale x 16 x i16> %ext1, splat (i16 1) 150 %add2 = add nuw nsw <vscale x 16 x i16> %add1, %ext2 151 %shr = lshr <vscale x 16 x i16> %add2, splat (i16 1) 152 %trunc = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8> 153 store <vscale x 16 x i8> %trunc, ptr %a 154 ret void 155} 156 157define void @add_multiple_uses(ptr %a, ptr %b, ptr %dst) { 158; SVE-LABEL: 'add_multiple_uses' 159; SVE: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32> 160; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32> 161; 162; SVE2-LABEL: 'add_multiple_uses' 163; SVE2: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32> 164; SVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32> 165; 166 %ld1 = load <vscale x 8 x i16>, ptr %a 167 %ld2 = load <vscale x 8 x i16>, ptr %b 168 %ext1 = sext <vscale x 8 x i16> %ld1 to <vscale x 8 x i32> 169 %ext2 = sext <vscale x 8 x i16> %ld2 to <vscale x 8 x i32> 170 %add1 = add nuw nsw <vscale x 8 x i32> %ext1, splat (i32 1) 171 %add2 = add nuw nsw <vscale x 8 x i32> %add1, %ext2 172 %shr = lshr <vscale x 8 x i32> %add2, splat (i32 1) 173 %trunc = trunc <vscale x 8 x i32> %shr to <vscale x 8 x i16> 174 %add.res = add nuw nsw <vscale x 8 x i32> %add1, %add2 175 %res = trunc <vscale x 8 x i32> %add.res to <vscale x 8 x i16> 176 store <vscale x 8 x i16> %res, ptr %a 177 ret void 178} 179 180define void @shift_multiple_uses(ptr %a, ptr %b, ptr %dst) { 181; SVE-LABEL: 'shift_multiple_uses' 182; SVE: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 183; SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 184; 185; SVE2-LABEL: 'shift_multiple_uses' 186; SVE2: Cost Model: Found an estimated cost of 2 for instruction: %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 187; SVE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 188; 189 %ld1 = load <vscale x 16 x i8>, ptr %a 190 %ld2 = load <vscale x 16 x i8>, ptr %b 191 %ext1 = zext <vscale x 16 x i8> %ld1 to <vscale x 16 x i16> 192 %ext2 = zext <vscale x 16 x i8> %ld2 to <vscale x 16 x i16> 193 %add1 = add nuw nsw <vscale x 16 x i16> %ext1, splat (i16 1) 194 %add2 = add nuw nsw <vscale x 16 x i16> %add1, %ext2 195 %shr = lshr <vscale x 16 x i16> %add2, splat (i16 1) 196 %trunc = trunc <vscale x 16 x i16> %shr to <vscale x 16 x i8> 197 %add3 = add nuw nsw <vscale x 16 x i16> %shr, %add2 198 %res = trunc <vscale x 16 x i16> %add3 to <vscale x 16 x i8> 199 store <vscale x 16 x i8> %res, ptr %a 200 ret void 201} 202