1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 3; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 4; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 5; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 6; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1 7; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 8; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 9; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 10; RUN: opt < %s -passes="print<cost-model>" -mtriple=x86_64-apple-darwin 2>&1 -disable-output -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 11 12define i32 @reduce_i64(i32 %arg) { 13; SSE2-LABEL: 'reduce_i64' 14; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) 15; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 16; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 17; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) 18; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) 19; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 20; 21; SSSE3-LABEL: 'reduce_i64' 22; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) 23; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 24; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 25; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) 26; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) 27; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 28; 29; SSE4-LABEL: 'reduce_i64' 30; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) 31; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 32; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 33; SSE4-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) 34; SSE4-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) 35; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 36; 37; AVX1-LABEL: 'reduce_i64' 38; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) 39; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 40; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 41; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) 42; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) 43; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 44; 45; AVX2-LABEL: 'reduce_i64' 46; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) 47; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 48; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 49; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) 50; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) 51; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 52; 53; AVX512-LABEL: 'reduce_i64' 54; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) 55; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 56; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 57; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) 58; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) 59; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 60; 61 %V1 = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> undef) 62 %V2 = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> undef) 63 %V4 = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> undef) 64 %V8 = call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> undef) 65 %V16 = call i64 @llvm.vector.reduce.umin.v16i64(<16 x i64> undef) 66 ret i32 undef 67} 68 69define i32 @reduce_i32(i32 %arg) { 70; SSE2-LABEL: 'reduce_i32' 71; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 72; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 73; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 74; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) 75; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) 76; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 77; 78; SSSE3-LABEL: 'reduce_i32' 79; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 80; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 81; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 82; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) 83; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) 84; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 85; 86; SSE4-LABEL: 'reduce_i32' 87; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 88; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 89; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 90; SSE4-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) 91; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) 92; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 93; 94; AVX1-LABEL: 'reduce_i32' 95; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 96; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 97; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 98; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) 99; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) 100; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 101; 102; AVX2-LABEL: 'reduce_i32' 103; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 104; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 105; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 106; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) 107; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) 108; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 109; 110; AVX512-LABEL: 'reduce_i32' 111; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 112; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 113; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 114; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) 115; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) 116; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 117; 118 %V2 = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> undef) 119 %V4 = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> undef) 120 %V8 = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> undef) 121 %V16 = call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> undef) 122 %V32 = call i32 @llvm.vector.reduce.umin.v32i32(<32 x i32> undef) 123 ret i32 undef 124} 125 126define i32 @reduce_i16(i32 %arg) { 127; SSE2-LABEL: 'reduce_i16' 128; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 129; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 130; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 131; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 132; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 133; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 134; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 135; 136; SSSE3-LABEL: 'reduce_i16' 137; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 138; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 139; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 140; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 141; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 142; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 143; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 144; 145; SSE4-LABEL: 'reduce_i16' 146; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 147; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 148; SSE4-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 149; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 150; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 151; SSE4-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 152; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 153; 154; AVX1-LABEL: 'reduce_i16' 155; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 156; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 157; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 158; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 159; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 160; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 161; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 162; 163; AVX2-LABEL: 'reduce_i16' 164; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 165; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 166; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 167; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 168; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 169; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 170; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 171; 172; AVX512F-LABEL: 'reduce_i16' 173; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 174; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 175; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 176; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 177; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 178; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 179; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 180; 181; AVX512BW-LABEL: 'reduce_i16' 182; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 183; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 184; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 185; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 186; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 187; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 188; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 189; 190; AVX512DQ-LABEL: 'reduce_i16' 191; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 192; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 193; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 194; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 195; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 196; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 197; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 198; 199 %V2 = call i16 @llvm.vector.reduce.umin.v2i16(<2 x i16> undef) 200 %V4 = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> undef) 201 %V8 = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> undef) 202 %V16 = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> undef) 203 %V32 = call i16 @llvm.vector.reduce.umin.v32i16(<32 x i16> undef) 204 %V64 = call i16 @llvm.vector.reduce.umin.v64i16(<64 x i16> undef) 205 ret i32 undef 206} 207 208define i32 @reduce_i8(i32 %arg) { 209; SSE2-LABEL: 'reduce_i8' 210; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 211; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 212; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 213; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 214; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 215; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 216; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 217; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 218; 219; SSSE3-LABEL: 'reduce_i8' 220; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 221; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 222; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 223; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 224; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 225; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 226; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 227; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 228; 229; SSE4-LABEL: 'reduce_i8' 230; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 231; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 232; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 233; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 234; SSE4-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 235; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 236; SSE4-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 237; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 238; 239; AVX1-LABEL: 'reduce_i8' 240; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 241; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 242; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 243; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 244; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 245; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 246; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 247; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 248; 249; AVX2-LABEL: 'reduce_i8' 250; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 251; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 252; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 253; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 254; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 255; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 256; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 257; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 258; 259; AVX512F-LABEL: 'reduce_i8' 260; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 261; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 262; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 263; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 264; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 265; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 266; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 267; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 268; 269; AVX512BW-LABEL: 'reduce_i8' 270; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 271; AVX512BW-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 272; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 273; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 274; AVX512BW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 275; AVX512BW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 276; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 277; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 278; 279; AVX512DQ-LABEL: 'reduce_i8' 280; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 281; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 282; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 283; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 284; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 285; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 286; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 287; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 288; 289 %V2 = call i8 @llvm.vector.reduce.umin.v2i8(<2 x i8> undef) 290 %V4 = call i8 @llvm.vector.reduce.umin.v4i8(<4 x i8> undef) 291 %V8 = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> undef) 292 %V16 = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> undef) 293 %V32 = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> undef) 294 %V64 = call i8 @llvm.vector.reduce.umin.v64i8(<64 x i8> undef) 295 %V128 = call i8 @llvm.vector.reduce.umin.v128i8(<128 x i8> undef) 296 ret i32 undef 297} 298 299declare i64 @llvm.vector.reduce.umin.v1i64(<1 x i64>) 300declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) 301declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) 302declare i64 @llvm.vector.reduce.umin.v8i64(<8 x i64>) 303declare i64 @llvm.vector.reduce.umin.v16i64(<16 x i64>) 304 305declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) 306declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 307declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) 308declare i32 @llvm.vector.reduce.umin.v16i32(<16 x i32>) 309declare i32 @llvm.vector.reduce.umin.v32i32(<32 x i32>) 310 311declare i16 @llvm.vector.reduce.umin.v2i16(<2 x i16>) 312declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) 313declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) 314declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) 315declare i16 @llvm.vector.reduce.umin.v32i16(<32 x i16>) 316declare i16 @llvm.vector.reduce.umin.v64i16(<64 x i16>) 317 318declare i8 @llvm.vector.reduce.umin.v2i8(<2 x i8>) 319declare i8 @llvm.vector.reduce.umin.v4i8(<4 x i8>) 320declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) 321declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) 322declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) 323declare i8 @llvm.vector.reduce.umin.v64i8(<64 x i8>) 324declare i8 @llvm.vector.reduce.umin.v128i8(<128 x i8>) 325;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 326; SSE41: {{.*}} 327; SSE42: {{.*}} 328