1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX1 6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 7; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F 8; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ 9; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512BW 10; 11; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SLM 12 13; 14; bswap(X) 15; 16 17define void @cost_bswap_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) { 18; SSE2-LABEL: 'cost_bswap_i64' 19; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 20; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 21; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 22; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 23; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 24; 25; SSSE3-LABEL: 'cost_bswap_i64' 26; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 27; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 28; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 29; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 30; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 31; 32; SSE42-LABEL: 'cost_bswap_i64' 33; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 34; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 35; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 36; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 37; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 38; 39; AVX1-LABEL: 'cost_bswap_i64' 40; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 41; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 42; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 43; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 44; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 45; 46; AVX2-LABEL: 'cost_bswap_i64' 47; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 48; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 49; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 50; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 51; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 52; 53; AVX512F-LABEL: 'cost_bswap_i64' 54; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 55; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 56; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 57; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 58; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 59; 60; AVX512DQ-LABEL: 'cost_bswap_i64' 61; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 62; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 63; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 64; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 65; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 66; 67; AVX512BW-LABEL: 'cost_bswap_i64' 68; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 69; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 70; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 71; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 72; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 73; 74; SLM-LABEL: 'cost_bswap_i64' 75; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.bswap.i64(i64 %a64) 76; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 77; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 78; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 79; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 80; 81 %I64 = call i64 @llvm.bswap.i64(i64 %a64) 82 %V2I64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a128) 83 %V4I64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %a256) 84 %V8I64 = call <8 x i64> @llvm.bswap.v8i64(<8 x i64> %a512) 85 ret void 86} 87 88define void @cost_bswap_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) { 89; SSE2-LABEL: 'cost_bswap_i32' 90; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 91; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 92; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 93; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 94; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 95; 96; SSSE3-LABEL: 'cost_bswap_i32' 97; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 98; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 99; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 100; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 101; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 102; 103; SSE42-LABEL: 'cost_bswap_i32' 104; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 105; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 106; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 107; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 108; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 109; 110; AVX1-LABEL: 'cost_bswap_i32' 111; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 112; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 113; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 114; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 115; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 116; 117; AVX2-LABEL: 'cost_bswap_i32' 118; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 119; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 120; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 121; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 122; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 123; 124; AVX512F-LABEL: 'cost_bswap_i32' 125; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 126; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 127; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 128; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 129; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 130; 131; AVX512DQ-LABEL: 'cost_bswap_i32' 132; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 133; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 134; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 135; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 136; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 137; 138; AVX512BW-LABEL: 'cost_bswap_i32' 139; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 140; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 141; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 142; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 143; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 144; 145; SLM-LABEL: 'cost_bswap_i32' 146; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.bswap.i32(i32 %a32) 147; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 148; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 149; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 150; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 151; 152 %I32 = call i32 @llvm.bswap.i32(i32 %a32) 153 %V2I32 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a128) 154 %V4I32 = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %a256) 155 %V8I32 = call <16 x i32> @llvm.bswap.v16i32(<16 x i32> %a512) 156 ret void 157} 158 159define void @cost_bswap_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) { 160; SSE2-LABEL: 'cost_bswap_i16' 161; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 162; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 163; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 164; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 165; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 166; 167; SSSE3-LABEL: 'cost_bswap_i16' 168; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 169; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 170; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 171; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 172; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 173; 174; SSE42-LABEL: 'cost_bswap_i16' 175; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 176; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 177; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 178; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 179; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 180; 181; AVX1-LABEL: 'cost_bswap_i16' 182; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 183; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 184; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 185; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 186; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 187; 188; AVX2-LABEL: 'cost_bswap_i16' 189; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 190; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 191; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 192; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 193; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 194; 195; AVX512F-LABEL: 'cost_bswap_i16' 196; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 197; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 198; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 199; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 200; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 201; 202; AVX512DQ-LABEL: 'cost_bswap_i16' 203; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 204; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 205; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 206; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 207; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 208; 209; AVX512BW-LABEL: 'cost_bswap_i16' 210; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 211; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 212; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 213; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 214; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 215; 216; SLM-LABEL: 'cost_bswap_i16' 217; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.bswap.i16(i16 %a16) 218; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 219; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 220; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 221; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 222; 223 %I16 = call i16 @llvm.bswap.i16(i16 %a16) 224 %V8I16 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a128) 225 %V16I16 = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %a256) 226 %V32I16 = call <32 x i16> @llvm.bswap.v32i16(<32 x i16> %a512) 227 ret void 228} 229 230declare i64 @llvm.bswap.i64(i64) 231declare i32 @llvm.bswap.i32(i32) 232declare i16 @llvm.bswap.i16(i16) 233 234declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 235declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 236declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 237 238declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) 239declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) 240declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>) 241 242declare <8 x i64> @llvm.bswap.v8i64(<8 x i64>) 243declare <16 x i32> @llvm.bswap.v16i32(<16 x i32>) 244declare <32 x i16> @llvm.bswap.v32i16(<32 x i16>) 245