xref: /llvm-project/llvm/test/Analysis/CostModel/X86/arith-overflow.ll (revision 7979f24954ed6c1bfac8a9961fee69a44822f2b6)
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSSE3
3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
5; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512F
7; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512BW
8; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512DQ
9;
10; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
11; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
12; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
13
14; CHECK: {{^}}
15
16;
17; sadd.with.overflow
18;
19
20declare {i64, i1}              @llvm.sadd.with.overflow.i64(i64, i64)
21declare {<2 x i64>, <2 x i1>}  @llvm.sadd.with.overflow.v2i64(<2 x i64>, <2 x i64>)
22declare {<4 x i64>, <4 x i1>}  @llvm.sadd.with.overflow.v4i64(<4 x i64>, <4 x i64>)
23declare {<8 x i64>, <8 x i1>}  @llvm.sadd.with.overflow.v8i64(<8 x i64>, <8 x i64>)
24
25declare {i32, i1}               @llvm.sadd.with.overflow.i32(i32, i32)
26declare {<4 x i32>, <4 x i1>}   @llvm.sadd.with.overflow.v4i32(<4 x i32>, <4 x i32>)
27declare {<8 x i32>, <8 x i1>}   @llvm.sadd.with.overflow.v8i32(<8 x i32>, <8 x i32>)
28declare {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x i32>, <16 x i32>)
29
30declare {i16, i1}               @llvm.sadd.with.overflow.i16(i16, i16)
31declare {<8 x i16>,  <8 x i1>}  @llvm.sadd.with.overflow.v8i16(<8 x i16>, <8 x i16>)
32declare {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x i16>, <16 x i16>)
33declare {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x i16>, <32 x i16>)
34
35declare {i8, i1}                @llvm.sadd.with.overflow.i8(i8, i8)
36declare {<16 x i8>, <16 x i1>}  @llvm.sadd.with.overflow.v16i8(<16 x i8>, <16 x i8>)
37declare {<32 x i8>, <32 x i1>}  @llvm.sadd.with.overflow.v32i8(<32 x i8>, <32 x i8>)
38declare {<64 x i8>, <64 x i1>}  @llvm.sadd.with.overflow.v64i8(<64 x i8>, <64 x i8>)
39
40define i32 @sadd(i32 %arg) {
41; SSSE3-LABEL: 'sadd'
42; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
43; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
44; SSSE3-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
45; SSSE3-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
46; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
47; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
48; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
49; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
50; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
51; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
52; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
53; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
54; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
55; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
56; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
57; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
58; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
59;
60; SSE42-LABEL: 'sadd'
61; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
62; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
63; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
64; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
65; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
66; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
67; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
68; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
69; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
70; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
71; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
72; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
73; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
74; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
75; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
76; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
77; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
78;
79; AVX1-LABEL: 'sadd'
80; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
81; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
82; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
83; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
84; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
85; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
86; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
87; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
88; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
89; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
90; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
91; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
92; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
93; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
94; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
95; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
96; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
97;
98; AVX2-LABEL: 'sadd'
99; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
100; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
101; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
102; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
103; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
104; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
105; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
106; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
107; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
108; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
109; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
110; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
111; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
112; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
113; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
114; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
115; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
116;
117; AVX512F-LABEL: 'sadd'
118; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
119; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
120; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
121; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
122; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
123; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
124; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
125; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
126; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
127; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
128; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
129; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
130; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
131; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
132; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
133; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
134; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
135;
136; AVX512BW-LABEL: 'sadd'
137; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
138; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
139; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
140; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
141; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
142; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
143; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
144; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
145; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
146; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
147; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
148; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
149; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
150; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
151; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
152; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
153; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
154;
155; AVX512DQ-LABEL: 'sadd'
156; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
157; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
158; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
159; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
160; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
161; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
162; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
163; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
164; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
165; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
166; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
167; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
168; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
169; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
170; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
171; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
172; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
173;
174; SLM-LABEL: 'sadd'
175; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
176; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
177; SLM-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
178; SLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
179; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
180; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
181; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
182; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
183; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
184; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
185; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
186; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
187; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
188; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
189; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
190; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
191; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
192;
193; GLM-LABEL: 'sadd'
194; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
195; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
196; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
197; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
198; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
199; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
200; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
201; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
202; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
203; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
204; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
205; GLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
206; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
207; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
208; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
209; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
210; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
211;
212; BTVER2-LABEL: 'sadd'
213; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
214; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
215; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
216; BTVER2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
217; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
218; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
219; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
220; BTVER2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
221; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
222; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
223; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
224; BTVER2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
225; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
226; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
227; BTVER2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
228; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
229; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
230;
231  %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
232  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
233  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
234  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
235
236  %I32 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
237  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
238  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
239  %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
240
241  %I16 = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
242  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
243  %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
244  %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
245
246  %I8 = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
247  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
248  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
249  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
250
251  ret i32 undef
252}
253
254;
255; uadd.with.overflow
256;
257
258declare {i64, i1}              @llvm.uadd.with.overflow.i64(i64, i64)
259declare {<2 x i64>, <2 x i1>}  @llvm.uadd.with.overflow.v2i64(<2 x i64>, <2 x i64>)
260declare {<4 x i64>, <4 x i1>}  @llvm.uadd.with.overflow.v4i64(<4 x i64>, <4 x i64>)
261declare {<8 x i64>, <8 x i1>}  @llvm.uadd.with.overflow.v8i64(<8 x i64>, <8 x i64>)
262
263declare {i32, i1}               @llvm.uadd.with.overflow.i32(i32, i32)
264declare {<4 x i32>, <4 x i1>}   @llvm.uadd.with.overflow.v4i32(<4 x i32>, <4 x i32>)
265declare {<8 x i32>, <8 x i1>}   @llvm.uadd.with.overflow.v8i32(<8 x i32>, <8 x i32>)
266declare {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x i32>, <16 x i32>)
267
268declare {i16, i1}               @llvm.uadd.with.overflow.i16(i16, i16)
269declare {<8 x i16>,  <8 x i1>}  @llvm.uadd.with.overflow.v8i16(<8 x i16>, <8 x i16>)
270declare {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x i16>, <16 x i16>)
271declare {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x i16>, <32 x i16>)
272
273declare {i8, i1}                @llvm.uadd.with.overflow.i8(i8, i8)
274declare {<16 x i8>, <16 x i1>}  @llvm.uadd.with.overflow.v16i8(<16 x i8>, <16 x i8>)
275declare {<32 x i8>, <32 x i1>}  @llvm.uadd.with.overflow.v32i8(<32 x i8>, <32 x i8>)
276declare {<64 x i8>, <64 x i1>}  @llvm.uadd.with.overflow.v64i8(<64 x i8>, <64 x i8>)
277
278define i32 @uadd(i32 %arg) {
279; SSSE3-LABEL: 'uadd'
280; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
281; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
282; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
283; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
284; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
285; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
286; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
287; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
288; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
289; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
290; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
291; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
292; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
293; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
294; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
295; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
296; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
297;
298; SSE42-LABEL: 'uadd'
299; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
300; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
301; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
302; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
303; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
304; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
305; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
306; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
307; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
308; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
309; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
310; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
311; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
312; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
313; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
314; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
315; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
316;
317; AVX1-LABEL: 'uadd'
318; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
319; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
320; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
321; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
322; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
323; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
324; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
325; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
326; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
327; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
328; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
329; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
330; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
331; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
332; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
333; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
334; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
335;
336; AVX2-LABEL: 'uadd'
337; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
338; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
339; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
340; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
341; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
342; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
343; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
344; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
345; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
346; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
347; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
348; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
349; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
350; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
351; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
352; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
353; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
354;
355; AVX512F-LABEL: 'uadd'
356; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
357; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
358; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
359; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
360; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
361; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
362; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
363; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
364; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
365; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
366; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
367; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
368; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
369; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
370; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
371; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
372; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
373;
374; AVX512BW-LABEL: 'uadd'
375; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
376; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
377; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
378; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
379; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
380; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
381; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
382; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
383; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
384; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
385; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
386; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
387; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
388; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
389; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
390; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
391; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
392;
393; AVX512DQ-LABEL: 'uadd'
394; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
395; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
396; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
397; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
398; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
399; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
400; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
401; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
402; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
403; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
404; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
405; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
406; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
407; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
408; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
409; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
410; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
411;
412; SLM-LABEL: 'uadd'
413; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
414; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
415; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
416; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
417; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
418; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
419; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
420; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
421; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
422; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
423; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
424; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
425; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
426; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
427; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
428; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
429; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
430;
431; GLM-LABEL: 'uadd'
432; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
433; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
434; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
435; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
436; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
437; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
438; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
439; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
440; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
441; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
442; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
443; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
444; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
445; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
446; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
447; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
448; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
449;
450; BTVER2-LABEL: 'uadd'
451; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
452; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
453; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
454; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
455; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
456; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
457; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
458; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
459; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
460; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
461; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
462; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
463; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
464; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
465; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
466; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
467; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
468;
469  %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
470  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
471  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
472  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
473
474  %I32 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
475  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
476  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
477  %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
478
479  %I16 = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
480  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
481  %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
482  %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
483
484  %I8 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
485  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
486  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
487  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
488
489  ret i32 undef
490}
491
492;
493; ssub.with.overflow
494;
495
496declare {i64, i1}              @llvm.ssub.with.overflow.i64(i64, i64)
497declare {<2 x i64>, <2 x i1>}  @llvm.ssub.with.overflow.v2i64(<2 x i64>, <2 x i64>)
498declare {<4 x i64>, <4 x i1>}  @llvm.ssub.with.overflow.v4i64(<4 x i64>, <4 x i64>)
499declare {<8 x i64>, <8 x i1>}  @llvm.ssub.with.overflow.v8i64(<8 x i64>, <8 x i64>)
500
501declare {i32, i1}               @llvm.ssub.with.overflow.i32(i32, i32)
502declare {<4 x i32>, <4 x i1>}   @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>)
503declare {<8 x i32>, <8 x i1>}   @llvm.ssub.with.overflow.v8i32(<8 x i32>, <8 x i32>)
504declare {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x i32>, <16 x i32>)
505
506declare {i16, i1}               @llvm.ssub.with.overflow.i16(i16, i16)
507declare {<8 x i16>,  <8 x i1>}  @llvm.ssub.with.overflow.v8i16(<8 x i16>, <8 x i16>)
508declare {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x i16>, <16 x i16>)
509declare {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x i16>, <32 x i16>)
510
511declare {i8, i1}                @llvm.ssub.with.overflow.i8(i8, i8)
512declare {<16 x i8>, <16 x i1>}  @llvm.ssub.with.overflow.v16i8(<16 x i8>, <16 x i8>)
513declare {<32 x i8>, <32 x i1>}  @llvm.ssub.with.overflow.v32i8(<32 x i8>, <32 x i8>)
514declare {<64 x i8>, <64 x i1>}  @llvm.ssub.with.overflow.v64i8(<64 x i8>, <64 x i8>)
515
516define i32 @ssub(i32 %arg) {
517; SSSE3-LABEL: 'ssub'
518; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
519; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
520; SSSE3-NEXT:  Cost Model: Found an estimated cost of 53 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
521; SSSE3-NEXT:  Cost Model: Found an estimated cost of 103 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
522; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
523; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
524; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
525; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
526; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
527; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
528; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
529; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
530; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
531; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
532; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
533; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
534; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
535;
536; SSE42-LABEL: 'ssub'
537; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
538; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
539; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
540; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
541; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
542; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
543; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
544; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
545; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
546; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
547; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
548; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
549; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
550; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
551; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
552; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
553; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
554;
555; AVX1-LABEL: 'ssub'
556; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
557; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
558; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
559; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
560; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
561; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
562; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
563; AVX1-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
564; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
565; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
566; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
567; AVX1-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
568; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
569; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
570; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
571; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
572; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
573;
574; AVX2-LABEL: 'ssub'
575; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
576; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
577; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
578; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
579; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
580; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
581; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
582; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
583; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
584; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
585; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
586; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
587; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
588; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
589; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
590; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
591; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
592;
593; AVX512F-LABEL: 'ssub'
594; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
595; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
596; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
597; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
598; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
599; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
600; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
601; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
602; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
603; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
604; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
605; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
606; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
607; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
608; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
609; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
610; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
611;
612; AVX512BW-LABEL: 'ssub'
613; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
614; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
615; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
616; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
617; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
618; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
619; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
620; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
621; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
622; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
623; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
624; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
625; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
626; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
627; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
628; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
629; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
630;
631; AVX512DQ-LABEL: 'ssub'
632; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
633; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
634; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
635; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
636; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
637; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
638; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
639; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
640; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
641; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
642; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
643; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
644; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
645; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
646; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
647; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
648; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
649;
650; SLM-LABEL: 'ssub'
651; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
652; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
653; SLM-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
654; SLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
655; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
656; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
657; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
658; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
659; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
660; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
661; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
662; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
663; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
664; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
665; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
666; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
667; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
668;
669; GLM-LABEL: 'ssub'
670; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
671; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
672; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
673; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
674; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
675; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
676; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
677; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
678; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
679; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
680; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
681; GLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
682; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
683; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
684; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
685; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
686; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
687;
688; BTVER2-LABEL: 'ssub'
689; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
690; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
691; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
692; BTVER2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
693; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
694; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
695; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
696; BTVER2-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
697; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
698; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
699; BTVER2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
700; BTVER2-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
701; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
702; BTVER2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
703; BTVER2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
704; BTVER2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
705; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
706;
707  %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
708  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
709  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
710  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
711
712  %I32 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
713  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
714  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
715  %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
716
717  %I16 = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
718  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
719  %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
720  %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
721
722  %I8 = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
723  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
724  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
725  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
726
727  ret i32 undef
728}
729
730;
731; usub.with.overflow
732;
733
734declare {i64, i1}              @llvm.usub.with.overflow.i64(i64, i64)
735declare {<2 x i64>, <2 x i1>}  @llvm.usub.with.overflow.v2i64(<2 x i64>, <2 x i64>)
736declare {<4 x i64>, <4 x i1>}  @llvm.usub.with.overflow.v4i64(<4 x i64>, <4 x i64>)
737declare {<8 x i64>, <8 x i1>}  @llvm.usub.with.overflow.v8i64(<8 x i64>, <8 x i64>)
738
739declare {i32, i1}               @llvm.usub.with.overflow.i32(i32, i32)
740declare {<4 x i32>, <4 x i1>}   @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>)
741declare {<8 x i32>, <8 x i1>}   @llvm.usub.with.overflow.v8i32(<8 x i32>, <8 x i32>)
742declare {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x i32>, <16 x i32>)
743
744declare {i16, i1}               @llvm.usub.with.overflow.i16(i16, i16)
745declare {<8 x i16>,  <8 x i1>}  @llvm.usub.with.overflow.v8i16(<8 x i16>, <8 x i16>)
746declare {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x i16>, <16 x i16>)
747declare {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x i16>, <32 x i16>)
748
749declare {i8, i1}                @llvm.usub.with.overflow.i8(i8, i8)
750declare {<16 x i8>, <16 x i1>}  @llvm.usub.with.overflow.v16i8(<16 x i8>, <16 x i8>)
751declare {<32 x i8>, <32 x i1>}  @llvm.usub.with.overflow.v32i8(<32 x i8>, <32 x i8>)
752declare {<64 x i8>, <64 x i1>}  @llvm.usub.with.overflow.v64i8(<64 x i8>, <64 x i8>)
753
754define i32 @usub(i32 %arg) {
755; SSSE3-LABEL: 'usub'
756; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
757; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
758; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
759; SSSE3-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
760; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
761; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
762; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
763; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
764; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
765; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
766; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
767; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
768; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
769; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
770; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
771; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
772; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
773;
774; SSE42-LABEL: 'usub'
775; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
776; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
777; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
778; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
779; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
780; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
781; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
782; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
783; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
784; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
785; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
786; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
787; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
788; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
789; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
790; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
791; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
792;
793; AVX1-LABEL: 'usub'
794; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
795; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
796; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
797; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
798; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
799; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
800; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
801; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
802; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
803; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
804; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
805; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
806; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
807; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
808; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
809; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
810; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
811;
812; AVX2-LABEL: 'usub'
813; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
814; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
815; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
816; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
817; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
818; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
819; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
820; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
821; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
822; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
823; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
824; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
825; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
826; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
827; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
828; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
829; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
830;
831; AVX512F-LABEL: 'usub'
832; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
833; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
834; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
835; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
836; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
837; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
838; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
839; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
840; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
841; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
842; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
843; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
844; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
845; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
846; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
847; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
848; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
849;
850; AVX512BW-LABEL: 'usub'
851; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
852; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
853; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
854; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
855; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
856; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
857; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
858; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
859; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
860; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
861; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
862; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
863; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
864; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
865; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
866; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
867; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
868;
869; AVX512DQ-LABEL: 'usub'
870; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
871; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
872; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
873; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
874; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
875; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
876; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
877; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
878; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
879; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
880; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
881; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
882; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
883; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
884; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
885; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
886; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
887;
888; SLM-LABEL: 'usub'
889; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
890; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
891; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
892; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
893; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
894; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
895; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
896; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
897; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
898; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
899; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
900; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
901; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
902; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
903; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
904; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
905; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
906;
907; GLM-LABEL: 'usub'
908; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
909; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
910; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
911; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
912; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
913; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
914; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
915; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
916; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
917; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
918; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
919; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
920; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
921; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
922; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
923; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
924; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
925;
926; BTVER2-LABEL: 'usub'
927; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
928; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
929; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
930; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
931; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
932; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
933; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
934; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
935; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
936; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
937; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
938; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
939; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
940; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
941; BTVER2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
942; BTVER2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
943; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
944;
945  %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
946  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
947  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
948  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
949
950  %I32 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
951  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
952  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
953  %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
954
955  %I16 = call {i16, i1} @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
956  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
957  %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
958  %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
959
960  %I8 = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
961  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
962  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
963  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
964
965  ret i32 undef
966}
967
968;
969; smul.with.overflow
970;
971
972declare {i64, i1}              @llvm.smul.with.overflow.i64(i64, i64)
973declare {<2 x i64>, <2 x i1>}  @llvm.smul.with.overflow.v2i64(<2 x i64>, <2 x i64>)
974declare {<4 x i64>, <4 x i1>}  @llvm.smul.with.overflow.v4i64(<4 x i64>, <4 x i64>)
975declare {<8 x i64>, <8 x i1>}  @llvm.smul.with.overflow.v8i64(<8 x i64>, <8 x i64>)
976
977declare {i32, i1}               @llvm.smul.with.overflow.i32(i32, i32)
978declare {<4 x i32>, <4 x i1>}   @llvm.smul.with.overflow.v4i32(<4 x i32>, <4 x i32>)
979declare {<8 x i32>, <8 x i1>}   @llvm.smul.with.overflow.v8i32(<8 x i32>, <8 x i32>)
980declare {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
981
982declare {i16, i1}               @llvm.smul.with.overflow.i16(i16, i16)
983declare {<8 x i16>,  <8 x i1>}  @llvm.smul.with.overflow.v8i16(<8 x i16>, <8 x i16>)
984declare {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x i16>, <16 x i16>)
985declare {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x i16>, <32 x i16>)
986
987declare {i8, i1}                @llvm.smul.with.overflow.i8(i8, i8)
988declare {<16 x i8>, <16 x i1>}  @llvm.smul.with.overflow.v16i8(<16 x i8>, <16 x i8>)
989declare {<32 x i8>, <32 x i1>}  @llvm.smul.with.overflow.v32i8(<32 x i8>, <32 x i8>)
990declare {<64 x i8>, <64 x i1>}  @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x i8>)
991
992define i32 @smul(i32 %arg) {
993; SSSE3-LABEL: 'smul'
994; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
995; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
996; SSSE3-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
997; SSSE3-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
998; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
999; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1000; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1001; SSSE3-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1002; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1003; SSSE3-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1004; SSSE3-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1005; SSSE3-NEXT:  Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1006; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1007; SSSE3-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1008; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1009; SSSE3-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1010; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1011;
1012; SSE42-LABEL: 'smul'
1013; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1014; SSE42-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1015; SSE42-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1016; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1017; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1018; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1019; SSE42-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1020; SSE42-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1021; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1022; SSE42-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1023; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1024; SSE42-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1025; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1026; SSE42-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1027; SSE42-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1028; SSE42-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1029; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1030;
1031; AVX1-LABEL: 'smul'
1032; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1033; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1034; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1035; AVX1-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1036; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1037; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1038; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1039; AVX1-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1040; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1041; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1042; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1043; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1044; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1045; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1046; AVX1-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1047; AVX1-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1048; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1049;
1050; AVX2-LABEL: 'smul'
1051; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1052; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1053; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1054; AVX2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1055; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1056; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1057; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1058; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1059; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1060; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1061; AVX2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1062; AVX2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1063; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1064; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1065; AVX2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1066; AVX2-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1067; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1068;
1069; AVX512F-LABEL: 'smul'
1070; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1071; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1072; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1073; AVX512F-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1074; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1075; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1076; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1077; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1078; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1079; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1080; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1081; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1082; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1083; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1084; AVX512F-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1085; AVX512F-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1086; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1087;
1088; AVX512BW-LABEL: 'smul'
1089; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1090; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1091; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1092; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1093; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1094; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1095; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1096; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1097; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1098; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1099; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1100; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1101; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1102; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1103; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1104; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1105; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1106;
1107; AVX512DQ-LABEL: 'smul'
1108; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1109; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1110; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1111; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 79 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1112; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1113; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1114; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1115; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1116; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1117; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1118; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1119; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1120; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1121; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1122; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1123; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1124; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1125;
1126; SLM-LABEL: 'smul'
1127; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1128; SLM-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1129; SLM-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1130; SLM-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1131; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1132; SLM-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1133; SLM-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1134; SLM-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1135; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1136; SLM-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1137; SLM-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1138; SLM-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1139; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1140; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1141; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1142; SLM-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1143; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1144;
1145; GLM-LABEL: 'smul'
1146; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1147; GLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1148; GLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1149; GLM-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1150; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1151; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1152; GLM-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1153; GLM-NEXT:  Cost Model: Found an estimated cost of 116 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1154; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1155; GLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1156; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1157; GLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1158; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1159; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1160; GLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1161; GLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1162; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1163;
1164; BTVER2-LABEL: 'smul'
1165; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1166; BTVER2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1167; BTVER2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1168; BTVER2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1169; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1170; BTVER2-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1171; BTVER2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1172; BTVER2-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1173; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1174; BTVER2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1175; BTVER2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1176; BTVER2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1177; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1178; BTVER2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1179; BTVER2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1180; BTVER2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1181; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1182;
1183  %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
1184  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1185  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1186  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1187
1188  %I32 = call {i32, i1} @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
1189  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1190  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1191  %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1192
1193  %I16 = call {i16, i1} @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
1194  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1195  %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1196  %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1197
1198  %I8 = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
1199  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1200  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1201  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1202
1203  ret i32 undef
1204}
1205
1206;
1207; umul.with.overflow
1208;
1209
1210declare {i64, i1}              @llvm.umul.with.overflow.i64(i64, i64)
1211declare {<2 x i64>, <2 x i1>}  @llvm.umul.with.overflow.v2i64(<2 x i64>, <2 x i64>)
1212declare {<4 x i64>, <4 x i1>}  @llvm.umul.with.overflow.v4i64(<4 x i64>, <4 x i64>)
1213declare {<8 x i64>, <8 x i1>}  @llvm.umul.with.overflow.v8i64(<8 x i64>, <8 x i64>)
1214
1215declare {i32, i1}               @llvm.umul.with.overflow.i32(i32, i32)
1216declare {<4 x i32>, <4 x i1>}   @llvm.umul.with.overflow.v4i32(<4 x i32>, <4 x i32>)
1217declare {<8 x i32>, <8 x i1>}   @llvm.umul.with.overflow.v8i32(<8 x i32>, <8 x i32>)
1218declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>)
1219
1220declare {i16, i1}               @llvm.umul.with.overflow.i16(i16, i16)
1221declare {<8 x i16>,  <8 x i1>}  @llvm.umul.with.overflow.v8i16(<8 x i16>, <8 x i16>)
1222declare {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x i16>, <16 x i16>)
1223declare {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x i16>, <32 x i16>)
1224
1225declare {i8, i1}                @llvm.umul.with.overflow.i8(i8, i8)
1226declare {<16 x i8>, <16 x i1>}  @llvm.umul.with.overflow.v16i8(<16 x i8>, <16 x i8>)
1227declare {<32 x i8>, <32 x i1>}  @llvm.umul.with.overflow.v32i8(<32 x i8>, <32 x i8>)
1228declare {<64 x i8>, <64 x i1>}  @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x i8>)
1229
1230define i32 @umul(i32 %arg) {
1231; SSSE3-LABEL: 'umul'
1232; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1233; SSSE3-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1234; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1235; SSSE3-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1236; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1237; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1238; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1239; SSSE3-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1240; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1241; SSSE3-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1242; SSSE3-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1243; SSSE3-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1244; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1245; SSSE3-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1246; SSSE3-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1247; SSSE3-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1248; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1249;
1250; SSE42-LABEL: 'umul'
1251; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1252; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1253; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1254; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1255; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1256; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1257; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1258; SSE42-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1259; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1260; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1261; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1262; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1263; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1264; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1265; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1266; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1267; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1268;
1269; AVX1-LABEL: 'umul'
1270; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1271; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1272; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1273; AVX1-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1274; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1275; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1276; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1277; AVX1-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1278; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1279; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1280; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1281; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1282; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1283; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1284; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1285; AVX1-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1286; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1287;
1288; AVX2-LABEL: 'umul'
1289; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1290; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1291; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1292; AVX2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1293; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1294; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1295; AVX2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1296; AVX2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1297; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1298; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1299; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1300; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1301; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1302; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1303; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1304; AVX2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1305; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1306;
1307; AVX512F-LABEL: 'umul'
1308; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1309; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1310; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1311; AVX512F-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1312; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1313; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1314; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1315; AVX512F-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1316; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1317; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1318; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1319; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1320; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1321; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1322; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1323; AVX512F-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1324; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1325;
1326; AVX512BW-LABEL: 'umul'
1327; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1328; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1329; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1330; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1331; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1332; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1333; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1334; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1335; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1336; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1337; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1338; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1339; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1340; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1341; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1342; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1343; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1344;
1345; AVX512DQ-LABEL: 'umul'
1346; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1347; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1348; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1349; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1350; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1351; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1352; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1353; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1354; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1355; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1356; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1357; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1358; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1359; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1360; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1361; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1362; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1363;
1364; SLM-LABEL: 'umul'
1365; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1366; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1367; SLM-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1368; SLM-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1369; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1370; SLM-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1371; SLM-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1372; SLM-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1373; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1374; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1375; SLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1376; SLM-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1377; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1378; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1379; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1380; SLM-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1381; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1382;
1383; GLM-LABEL: 'umul'
1384; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1385; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1386; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1387; GLM-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1388; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1389; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1390; GLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1391; GLM-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1392; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1393; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1394; GLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1395; GLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1396; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1397; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1398; GLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1399; GLM-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1400; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1401;
1402; BTVER2-LABEL: 'umul'
1403; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1404; BTVER2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1405; BTVER2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1406; BTVER2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1407; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1408; BTVER2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1409; BTVER2-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1410; BTVER2-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1411; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1412; BTVER2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1413; BTVER2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1414; BTVER2-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1415; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1416; BTVER2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1417; BTVER2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1418; BTVER2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1419; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
1420;
1421  %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
1422  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef)
1423  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef)
1424  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef)
1425
1426  %I32 = call {i32, i1} @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
1427  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef)
1428  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef)
1429  %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
1430
1431  %I16 = call {i16, i1} @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
1432  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
1433  %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
1434  %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
1435
1436  %I8 = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
1437  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
1438  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
1439  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
1440
1441  ret i32 undef
1442}
1443