1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE 3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s -check-prefixes=SSE2 4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=SSE3 5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX 6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX2 7; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s -check-prefixes=AVX512 8 9; This test checks that the cost of a splat-load shuffle is correctly detected. 10; If there is a combined load+broadcast instruction, like `movddup` it should 11; return 0. 12; 13; TODO: AVX `vbroadcast*` seems to support more types than the 14; 2xdouble type of `movddup`: 15; - `vbroadcastss` supports 4xfloat, 8xfloat 16; - `vbroadcastsd` supports 4xdouble 17 18; NOTE: The code in this test is a hack. Since TTI cannot currently detect a 19; proper broadcast pattern from a scalar load (like the one that follows), 20; we use a vector load as the shuffle's operand to trigger the pattern. 21; 22; %load = load double, double *%ptr 23; %insert = insertelement <2 x double> poison, double %load, i32 0 24; %bcast = shufflevector <2 x double> %insert, <2 x double> poison, <2 x i32> zeroinitializer 25 26define void @shuffle_load() { 27; SSE-LABEL: 'shuffle_load' 28; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 29; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer 30; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 31; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer 32; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 33; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer 34; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 35; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer 36; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 37; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer 38; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 39; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer 40; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 41; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer 42; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 43; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer 44; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 45; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer 46; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 47; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer 48; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 49; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer 50; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 51; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer 52; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 53; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer 54; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 55; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer 56; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 57; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer 58; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 59; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer 60; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 61; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer 62; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 63; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer 64; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 65; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer 66; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 67; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer 68; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 69; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer 70; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 71; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer 72; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 73; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer 74; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 75; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer 76; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 77; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer 78; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 79; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer 80; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 81; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer 82; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 83; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer 84; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 85; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer 86; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 87; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer 88; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 89; 90; SSE2-LABEL: 'shuffle_load' 91; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 92; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer 93; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 94; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer 95; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 96; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer 97; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 98; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer 99; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 100; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer 101; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 102; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer 103; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 104; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer 105; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 106; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer 107; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 108; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer 109; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 110; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer 111; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 112; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer 113; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 114; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer 115; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 116; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer 117; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 118; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer 119; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 120; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer 121; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 122; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer 123; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 124; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer 125; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 126; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer 127; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 128; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer 129; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 130; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer 131; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 132; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer 133; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 134; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer 135; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 136; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer 137; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 138; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer 139; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 140; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer 141; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 142; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer 143; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 144; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer 145; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 146; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer 147; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 148; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer 149; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 150; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer 151; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 152; 153; SSE3-LABEL: 'shuffle_load' 154; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 155; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer 156; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 157; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer 158; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 159; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer 160; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 161; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer 162; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 163; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer 164; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 165; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer 166; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 167; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer 168; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 169; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer 170; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 171; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer 172; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 173; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer 174; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 175; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer 176; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 177; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer 178; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 179; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer 180; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 181; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer 182; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 183; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer 184; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 185; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer 186; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 187; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer 188; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 189; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer 190; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 191; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer 192; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 193; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer 194; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 195; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer 196; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 197; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer 198; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 199; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer 200; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 201; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer 202; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 203; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer 204; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 205; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer 206; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 207; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer 208; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 209; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer 210; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 211; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer 212; SSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 213; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer 214; SSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 215; 216; AVX-LABEL: 'shuffle_load' 217; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 218; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer 219; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 220; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer 221; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 222; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer 223; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 224; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer 225; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 226; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer 227; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 228; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer 229; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 230; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer 231; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 232; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer 233; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 234; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer 235; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 236; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer 237; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 238; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer 239; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 240; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer 241; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 242; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer 243; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 244; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer 245; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 246; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer 247; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 248; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer 249; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 250; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer 251; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 252; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer 253; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 254; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer 255; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 256; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer 257; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 258; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer 259; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 260; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer 261; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 262; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer 263; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 264; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer 265; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 266; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer 267; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 268; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer 269; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 270; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer 271; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 272; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer 273; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 274; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer 275; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 276; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer 277; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 278; 279; AVX2-LABEL: 'shuffle_load' 280; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 281; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer 282; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 283; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer 284; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 285; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer 286; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 287; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer 288; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 289; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer 290; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 291; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer 292; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 293; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer 294; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 295; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer 296; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 297; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer 298; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 299; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer 300; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 301; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer 302; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 303; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer 304; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 305; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer 306; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 307; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer 308; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 309; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer 310; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 311; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer 312; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 313; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer 314; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 315; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer 316; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 317; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer 318; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 319; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer 320; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 321; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer 322; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 323; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer 324; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 325; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer 326; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 327; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer 328; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 329; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer 330; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 331; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer 332; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 333; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer 334; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 335; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer 336; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 337; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer 338; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 339; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer 340; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 341; 342; AVX512-LABEL: 'shuffle_load' 343; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi8 = load <2 x i8>, ptr undef, align 2 344; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer 345; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi8 = load <4 x i8>, ptr undef, align 4 346; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer 347; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi8 = load <8 x i8>, ptr undef, align 8 348; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer 349; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi8 = load <16 x i8>, ptr undef, align 16 350; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer 351; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi8 = load <32 x i8>, ptr undef, align 32 352; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer 353; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_64xi8 = load <64 x i8>, ptr undef, align 64 354; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer 355; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi16 = load <2 x i16>, ptr undef, align 4 356; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer 357; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi16 = load <4 x i16>, ptr undef, align 8 358; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer 359; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi16 = load <8 x i16>, ptr undef, align 16 360; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer 361; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi16 = load <16 x i16>, ptr undef, align 32 362; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer 363; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xi16 = load <32 x i16>, ptr undef, align 64 364; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer 365; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi32 = load <2 x i32>, ptr undef, align 8 366; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer 367; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi32 = load <4 x i32>, ptr undef, align 16 368; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer 369; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi32 = load <8 x i32>, ptr undef, align 32 370; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer 371; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xi32 = load <16 x i32>, ptr undef, align 64 372; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer 373; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xi64 = load <2 x i64>, ptr undef, align 16 374; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer 375; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xi64 = load <4 x i64>, ptr undef, align 32 376; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer 377; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xi64 = load <8 x i64>, ptr undef, align 64 378; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer 379; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf16 = load <2 x half>, ptr undef, align 4 380; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer 381; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf16 = load <4 x half>, ptr undef, align 8 382; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer 383; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf16 = load <8 x half>, ptr undef, align 16 384; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer 385; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf16 = load <16 x half>, ptr undef, align 32 386; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer 387; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_32xf16 = load <32 x half>, ptr undef, align 64 388; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer 389; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf32 = load <2 x float>, ptr undef, align 8 390; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer 391; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf32 = load <4 x float>, ptr undef, align 16 392; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer 393; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf32 = load <8 x float>, ptr undef, align 32 394; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer 395; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_16xf32 = load <16 x float>, ptr undef, align 64 396; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer 397; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_2xf64 = load <2 x double>, ptr undef, align 16 398; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer 399; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_4xf64 = load <4 x double>, ptr undef, align 32 400; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer 401; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %ld_8xf64 = load <8 x double>, ptr undef, align 64 402; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer 403; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 404; 405 %ld_2xi8 = load <2 x i8>, ptr undef 406 %sf_2xi8 = shufflevector <2 x i8> %ld_2xi8, <2 x i8> undef, <2 x i32> zeroinitializer 407 %ld_4xi8 = load <4 x i8>, ptr undef 408 %sf_4xi8 = shufflevector <4 x i8> %ld_4xi8, <4 x i8> undef, <4 x i32> zeroinitializer 409 %ld_8xi8 = load <8 x i8>, ptr undef 410 %sf_8xi8 = shufflevector <8 x i8> %ld_8xi8, <8 x i8> undef, <8 x i32> zeroinitializer 411 %ld_16xi8 = load <16 x i8>, ptr undef 412 %sf_16xi8 = shufflevector <16 x i8> %ld_16xi8, <16 x i8> undef, <16 x i32> zeroinitializer 413 %ld_32xi8 = load <32 x i8>, ptr undef 414 %sf_32xi8 = shufflevector <32 x i8> %ld_32xi8, <32 x i8> undef, <32 x i32> zeroinitializer 415 %ld_64xi8 = load <64 x i8>, ptr undef 416 %sf_64xi8 = shufflevector <64 x i8> %ld_64xi8, <64 x i8> undef, <64 x i32> zeroinitializer 417 418 %ld_2xi16 = load <2 x i16>, ptr undef 419 %sf_2xi16 = shufflevector <2 x i16> %ld_2xi16, <2 x i16> undef, <2 x i32> zeroinitializer 420 %ld_4xi16 = load <4 x i16>, ptr undef 421 %sf_4xi16 = shufflevector <4 x i16> %ld_4xi16, <4 x i16> undef, <4 x i32> zeroinitializer 422 %ld_8xi16 = load <8 x i16>, ptr undef 423 %sf_8xi16 = shufflevector <8 x i16> %ld_8xi16, <8 x i16> undef, <8 x i32> zeroinitializer 424 %ld_16xi16 = load <16 x i16>, ptr undef 425 %sf_16xi16 = shufflevector <16 x i16> %ld_16xi16, <16 x i16> undef, <16 x i32> zeroinitializer 426 %ld_32xi16 = load <32 x i16>, ptr undef 427 %sf_32xi16 = shufflevector <32 x i16> %ld_32xi16, <32 x i16> undef, <32 x i32> zeroinitializer 428 429 %ld_2xi32 = load <2 x i32>, ptr undef 430 %sf_2xi32 = shufflevector <2 x i32> %ld_2xi32, <2 x i32> undef, <2 x i32> zeroinitializer 431 %ld_4xi32 = load <4 x i32>, ptr undef 432 %sf_4xi32 = shufflevector <4 x i32> %ld_4xi32, <4 x i32> undef, <4 x i32> zeroinitializer 433 %ld_8xi32 = load <8 x i32>, ptr undef 434 %sf_8xi32 = shufflevector <8 x i32> %ld_8xi32, <8 x i32> undef, <8 x i32> zeroinitializer 435 %ld_16xi32 = load <16 x i32>, ptr undef 436 %sf_16xi32 = shufflevector <16 x i32> %ld_16xi32, <16 x i32> undef, <16 x i32> zeroinitializer 437 438 %ld_2xi64 = load <2 x i64>, ptr undef 439 %sf_2xi64 = shufflevector <2 x i64> %ld_2xi64, <2 x i64> undef, <2 x i32> zeroinitializer 440 %ld_4xi64 = load <4 x i64>, ptr undef 441 %sf_4xi64 = shufflevector <4 x i64> %ld_4xi64, <4 x i64> undef, <4 x i32> zeroinitializer 442 %ld_8xi64 = load <8 x i64>, ptr undef 443 %sf_8xi64 = shufflevector <8 x i64> %ld_8xi64, <8 x i64> undef, <8 x i32> zeroinitializer 444 445 %ld_2xf16 = load <2 x half>, ptr undef 446 %sf_2xf16 = shufflevector <2 x half> %ld_2xf16, <2 x half> undef, <2 x i32> zeroinitializer 447 %ld_4xf16 = load <4 x half>, ptr undef 448 %sf_4xf16 = shufflevector <4 x half> %ld_4xf16, <4 x half> undef, <4 x i32> zeroinitializer 449 %ld_8xf16 = load <8 x half>, ptr undef 450 %sf_8xf16 = shufflevector <8 x half> %ld_8xf16, <8 x half> undef, <8 x i32> zeroinitializer 451 %ld_16xf16 = load <16 x half>, ptr undef 452 %sf_16xf16 = shufflevector <16 x half> %ld_16xf16, <16 x half> undef, <16 x i32> zeroinitializer 453 %ld_32xf16 = load <32 x half>, ptr undef 454 %sf_32xf16 = shufflevector <32 x half> %ld_32xf16, <32 x half> undef, <32 x i32> zeroinitializer 455 456 %ld_2xf32 = load <2 x float>, ptr undef 457 %sf_2xf32 = shufflevector <2 x float> %ld_2xf32, <2 x float> undef, <2 x i32> zeroinitializer 458 %ld_4xf32 = load <4 x float>, ptr undef 459 %sf_4xf32 = shufflevector <4 x float> %ld_4xf32, <4 x float> undef, <4 x i32> zeroinitializer 460 %ld_8xf32 = load <8 x float>, ptr undef 461 %sf_8xf32 = shufflevector <8 x float> %ld_8xf32, <8 x float> undef, <8 x i32> zeroinitializer 462 %ld_16xf32 = load <16 x float>, ptr undef 463 %sf_16xf32 = shufflevector <16 x float> %ld_16xf32, <16 x float> undef, <16 x i32> zeroinitializer 464 465 %ld_2xf64 = load <2 x double>, ptr undef 466 %sf_2xf64 = shufflevector <2 x double> %ld_2xf64, <2 x double> undef, <2 x i32> zeroinitializer 467 %ld_4xf64 = load <4 x double>, ptr undef 468 %sf_4xf64 = shufflevector <4 x double> %ld_4xf64, <4 x double> undef, <4 x i32> zeroinitializer 469 %ld_8xf64 = load <8 x double>, ptr undef 470 %sf_8xf64 = shufflevector <8 x double> %ld_8xf64, <8 x double> undef, <8 x i32> zeroinitializer 471 472 ret void 473} 474