1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s 3; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s 4; RUN: opt < %s -mtriple=riscv32 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE 5; RUN: opt < %s -mtriple=riscv64 -mattr=+v -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE 6 7define i32 @reduce_i1(i32 %arg) { 8; CHECK-LABEL: 'reduce_i1' 9; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef) 10; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef) 11; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef) 12; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef) 13; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef) 14; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) 15; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef) 16; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef) 17; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 18; 19; SIZE-LABEL: 'reduce_i1' 20; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef) 21; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef) 22; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef) 23; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef) 24; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef) 25; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) 26; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef) 27; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef) 28; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 29; 30 %V1 = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> undef) 31 %V2 = call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> undef) 32 %V4 = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> undef) 33 %V8 = call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> undef) 34 %V16 = call i1 @llvm.vector.reduce.add.v16i1(<16 x i1> undef) 35 %V32 = call i1 @llvm.vector.reduce.add.v32i1(<32 x i1> undef) 36 %V64 = call i1 @llvm.vector.reduce.add.v64i1(<64 x i1> undef) 37 %V128 = call i1 @llvm.vector.reduce.add.v128i1(<128 x i1> undef) 38 ret i32 undef 39} 40 41define i32 @reduce_i8(i32 %arg) { 42; CHECK-LABEL: 'reduce_i8' 43; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) 44; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 45; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 46; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 47; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 48; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) 49; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) 50; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) 51; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 52; 53; SIZE-LABEL: 'reduce_i8' 54; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) 55; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 56; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 57; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 58; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 59; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) 60; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) 61; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) 62; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 63; 64 %V1 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) 65 %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) 66 %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) 67 %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) 68 %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) 69 %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef) 70 %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef) 71 %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef) 72 ret i32 undef 73} 74 75define i32 @reduce_i16(i32 %arg) { 76; CHECK-LABEL: 'reduce_i16' 77; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) 78; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 79; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 80; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 81; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 82; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) 83; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) 84; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) 85; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 86; 87; SIZE-LABEL: 'reduce_i16' 88; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) 89; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 90; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 91; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 92; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 93; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) 94; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) 95; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) 96; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 97; 98 %V1 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) 99 %V2 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) 100 %V4 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) 101 %V8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) 102 %V16 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) 103 %V32 = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> undef) 104 %V64 = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> undef) 105 %V128 = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> undef) 106 ret i32 undef 107} 108 109define i32 @reduce_i32(i32 %arg) { 110; CHECK-LABEL: 'reduce_i32' 111; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) 112; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 113; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 114; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 115; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 116; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) 117; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) 118; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) 119; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 120; 121; SIZE-LABEL: 'reduce_i32' 122; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) 123; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 124; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 125; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 126; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 127; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) 128; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) 129; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) 130; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 131; 132 %V1 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) 133 %V2 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) 134 %V4 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) 135 %V8 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) 136 %V16 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) 137 %V32 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> undef) 138 %V64 = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> undef) 139 %V128 = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> undef) 140 ret i32 undef 141} 142 143define i32 @reduce_i64(i32 %arg) { 144; CHECK-LABEL: 'reduce_i64' 145; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) 146; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 147; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 148; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 149; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 150; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) 151; CHECK-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) 152; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) 153; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 154; 155; SIZE-LABEL: 'reduce_i64' 156; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) 157; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 158; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 159; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 160; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 161; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) 162; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) 163; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) 164; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 165; 166 %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef) 167 %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef) 168 %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef) 169 %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef) 170 %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef) 171 %V32 = call i64 @llvm.vector.reduce.add.v32i64(<32 x i64> undef) 172 %V64 = call i64 @llvm.vector.reduce.add.v64i64(<64 x i64> undef) 173 %V128 = call i64 @llvm.vector.reduce.add.v128i64(<128 x i64> undef) 174 ret i32 undef 175} 176 177declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1>) 178declare i1 @llvm.vector.reduce.add.v2i1(<2 x i1>) 179declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1>) 180declare i1 @llvm.vector.reduce.add.v8i1(<8 x i1>) 181declare i1 @llvm.vector.reduce.add.v16i1(<16 x i1>) 182declare i1 @llvm.vector.reduce.add.v32i1(<32 x i1>) 183declare i1 @llvm.vector.reduce.add.v64i1(<64 x i1>) 184declare i1 @llvm.vector.reduce.add.v128i1(<128 x i1>) 185declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>) 186declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) 187declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) 188declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) 189declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) 190declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) 191declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>) 192declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>) 193declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16>) 194declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) 195declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) 196declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 197declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) 198declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>) 199declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>) 200declare i16 @llvm.vector.reduce.add.v128i16(<128 x i16>) 201declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32>) 202declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) 203declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 204declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 205declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) 206declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) 207declare i32 @llvm.vector.reduce.add.v64i32(<64 x i32>) 208declare i32 @llvm.vector.reduce.add.v128i32(<128 x i32>) 209declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>) 210declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 211declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 212declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) 213declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) 214declare i64 @llvm.vector.reduce.add.v32i64(<32 x i64>) 215declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>) 216declare i64 @llvm.vector.reduce.add.v128i64(<128 x i64>) 217