1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s 3; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=CODESIZE 4 5; These tests check the costs of ld1r instructions, through the 6; isLegalBroadcastLoad method. 7 8target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 9 10; The tests use vector loads and splats, as opposed to scalar loads, inserts 11; and splats as that is how getShuffleCost currently recognizes them. 12define void @shuffle() { 13; CHECK-LABEL: 'shuffle' 14; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %lv2i8 = load <2 x i8>, ptr undef, align 2 15; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i8 = shufflevector <2 x i8> %lv2i8, <2 x i8> undef, <2 x i32> zeroinitializer 16; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4i8 = load <4 x i8>, ptr undef, align 4 17; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4i8 = shufflevector <4 x i8> %lv4i8, <4 x i8> undef, <4 x i32> zeroinitializer 18; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8i8 = load <8 x i8>, ptr undef, align 8 19; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv8i8 = shufflevector <8 x i8> %lv8i8, <8 x i8> undef, <8 x i32> zeroinitializer 20; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv16i8 = load <16 x i8>, ptr undef, align 16 21; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv16i8 = shufflevector <16 x i8> %lv16i8, <16 x i8> undef, <16 x i32> zeroinitializer 22; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %lv2i16 = load <2 x i16>, ptr undef, align 4 23; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i16 = shufflevector <2 x i16> %lv2i16, <2 x i16> undef, <2 x i32> zeroinitializer 24; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4i16 = load <4 x i16>, ptr undef, align 8 25; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4i16 = shufflevector <4 x i16> %lv4i16, <4 x i16> undef, <4 x i32> zeroinitializer 26; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8i16 = load <8 x i16>, ptr undef, align 16 27; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv8i16 = shufflevector <8 x i16> %lv8i16, <8 x i16> undef, <8 x i32> zeroinitializer 28; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv16i16 = load <16 x i16>, ptr undef, align 32 29; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv16i16 = shufflevector <16 x i16> %lv16i16, <16 x i16> undef, <16 x i32> zeroinitializer 30; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i32 = load <2 x i32>, ptr undef, align 8 31; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i32 = shufflevector <2 x i32> %lv2i32, <2 x i32> undef, <2 x i32> zeroinitializer 32; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4i32 = load <4 x i32>, ptr undef, align 16 33; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4i32 = shufflevector <4 x i32> %lv4i32, <4 x i32> undef, <4 x i32> zeroinitializer 34; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv8i32 = load <8 x i32>, ptr undef, align 32 35; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv8i32 = shufflevector <8 x i32> %lv8i32, <8 x i32> undef, <8 x i32> zeroinitializer 36; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i64 = load <2 x i64>, ptr undef, align 16 37; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i64 = shufflevector <2 x i64> %lv2i64, <2 x i64> undef, <2 x i32> zeroinitializer 38; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4i64 = load <4 x i64>, ptr undef, align 32 39; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv4i64 = shufflevector <4 x i64> %lv4i64, <4 x i64> undef, <4 x i32> zeroinitializer 40; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f16 = load <2 x half>, ptr undef, align 4 41; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2f16 = shufflevector <2 x half> %lv2f16, <2 x half> undef, <2 x i32> zeroinitializer 42; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4f16 = load <4 x half>, ptr undef, align 8 43; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4f16 = shufflevector <4 x half> %lv4f16, <4 x half> undef, <4 x i32> zeroinitializer 44; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8f16 = load <8 x half>, ptr undef, align 16 45; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv8f16 = shufflevector <8 x half> %lv8f16, <8 x half> undef, <8 x i32> zeroinitializer 46; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv16f16 = load <16 x half>, ptr undef, align 32 47; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv16f16 = shufflevector <16 x half> %lv16f16, <16 x half> undef, <16 x i32> zeroinitializer 48; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f32 = load <2 x float>, ptr undef, align 8 49; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2f32 = shufflevector <2 x float> %lv2f32, <2 x float> undef, <2 x i32> zeroinitializer 50; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4f32 = load <4 x float>, ptr undef, align 16 51; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4f32 = shufflevector <4 x float> %lv4f32, <4 x float> undef, <4 x i32> zeroinitializer 52; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv8f32 = load <8 x float>, ptr undef, align 32 53; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv8f32 = shufflevector <8 x float> %lv8f32, <8 x float> undef, <8 x i32> zeroinitializer 54; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f64 = load <2 x double>, ptr undef, align 16 55; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2f64 = shufflevector <2 x double> %lv2f64, <2 x double> undef, <2 x i32> zeroinitializer 56; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4f64 = load <4 x double>, ptr undef, align 32 57; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sv4f64 = shufflevector <4 x double> %lv4f64, <4 x double> undef, <4 x i32> zeroinitializer 58; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 59; 60; CODESIZE-LABEL: 'shuffle' 61; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i8 = load <2 x i8>, ptr undef, align 2 62; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i8 = shufflevector <2 x i8> %lv2i8, <2 x i8> undef, <2 x i32> zeroinitializer 63; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4i8 = load <4 x i8>, ptr undef, align 4 64; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv4i8 = shufflevector <4 x i8> %lv4i8, <4 x i8> undef, <4 x i32> zeroinitializer 65; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8i8 = load <8 x i8>, ptr undef, align 8 66; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8i8 = shufflevector <8 x i8> %lv8i8, <8 x i8> undef, <8 x i32> zeroinitializer 67; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv16i8 = load <16 x i8>, ptr undef, align 16 68; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv16i8 = shufflevector <16 x i8> %lv16i8, <16 x i8> undef, <16 x i32> zeroinitializer 69; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i16 = load <2 x i16>, ptr undef, align 4 70; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sv2i16 = shufflevector <2 x i16> %lv2i16, <2 x i16> undef, <2 x i32> zeroinitializer 71; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4i16 = load <4 x i16>, ptr undef, align 8 72; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4i16 = shufflevector <4 x i16> %lv4i16, <4 x i16> undef, <4 x i32> zeroinitializer 73; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8i16 = load <8 x i16>, ptr undef, align 16 74; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8i16 = shufflevector <8 x i16> %lv8i16, <8 x i16> undef, <8 x i32> zeroinitializer 75; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv16i16 = load <16 x i16>, ptr undef, align 32 76; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv16i16 = shufflevector <16 x i16> %lv16i16, <16 x i16> undef, <16 x i32> zeroinitializer 77; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i32 = load <2 x i32>, ptr undef, align 8 78; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2i32 = shufflevector <2 x i32> %lv2i32, <2 x i32> undef, <2 x i32> zeroinitializer 79; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4i32 = load <4 x i32>, ptr undef, align 16 80; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4i32 = shufflevector <4 x i32> %lv4i32, <4 x i32> undef, <4 x i32> zeroinitializer 81; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv8i32 = load <8 x i32>, ptr undef, align 32 82; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8i32 = shufflevector <8 x i32> %lv8i32, <8 x i32> undef, <8 x i32> zeroinitializer 83; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2i64 = load <2 x i64>, ptr undef, align 16 84; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2i64 = shufflevector <2 x i64> %lv2i64, <2 x i64> undef, <2 x i32> zeroinitializer 85; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4i64 = load <4 x i64>, ptr undef, align 32 86; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4i64 = shufflevector <4 x i64> %lv4i64, <4 x i64> undef, <4 x i32> zeroinitializer 87; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f16 = load <2 x half>, ptr undef, align 4 88; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2f16 = shufflevector <2 x half> %lv2f16, <2 x half> undef, <2 x i32> zeroinitializer 89; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4f16 = load <4 x half>, ptr undef, align 8 90; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4f16 = shufflevector <4 x half> %lv4f16, <4 x half> undef, <4 x i32> zeroinitializer 91; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv8f16 = load <8 x half>, ptr undef, align 16 92; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8f16 = shufflevector <8 x half> %lv8f16, <8 x half> undef, <8 x i32> zeroinitializer 93; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv16f16 = load <16 x half>, ptr undef, align 32 94; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv16f16 = shufflevector <16 x half> %lv16f16, <16 x half> undef, <16 x i32> zeroinitializer 95; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f32 = load <2 x float>, ptr undef, align 8 96; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2f32 = shufflevector <2 x float> %lv2f32, <2 x float> undef, <2 x i32> zeroinitializer 97; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv4f32 = load <4 x float>, ptr undef, align 16 98; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4f32 = shufflevector <4 x float> %lv4f32, <4 x float> undef, <4 x i32> zeroinitializer 99; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv8f32 = load <8 x float>, ptr undef, align 32 100; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv8f32 = shufflevector <8 x float> %lv8f32, <8 x float> undef, <8 x i32> zeroinitializer 101; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lv2f64 = load <2 x double>, ptr undef, align 16 102; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv2f64 = shufflevector <2 x double> %lv2f64, <2 x double> undef, <2 x i32> zeroinitializer 103; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %lv4f64 = load <4 x double>, ptr undef, align 32 104; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sv4f64 = shufflevector <4 x double> %lv4f64, <4 x double> undef, <4 x i32> zeroinitializer 105; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 106; 107 %lv2i8 = load <2 x i8>, ptr undef 108 %sv2i8 = shufflevector <2 x i8> %lv2i8, <2 x i8> undef, <2 x i32> zeroinitializer 109 %lv4i8 = load <4 x i8>, ptr undef 110 %sv4i8 = shufflevector <4 x i8> %lv4i8, <4 x i8> undef, <4 x i32> zeroinitializer 111 %lv8i8 = load <8 x i8>, ptr undef 112 %sv8i8 = shufflevector <8 x i8> %lv8i8, <8 x i8> undef, <8 x i32> zeroinitializer 113 %lv16i8 = load <16 x i8>, ptr undef 114 %sv16i8 = shufflevector <16 x i8> %lv16i8, <16 x i8> undef, <16 x i32> zeroinitializer 115 116 %lv2i16 = load <2 x i16>, ptr undef 117 %sv2i16 = shufflevector <2 x i16> %lv2i16, <2 x i16> undef, <2 x i32> zeroinitializer 118 %lv4i16 = load <4 x i16>, ptr undef 119 %sv4i16 = shufflevector <4 x i16> %lv4i16, <4 x i16> undef, <4 x i32> zeroinitializer 120 %lv8i16 = load <8 x i16>, ptr undef 121 %sv8i16 = shufflevector <8 x i16> %lv8i16, <8 x i16> undef, <8 x i32> zeroinitializer 122 %lv16i16 = load <16 x i16>, ptr undef 123 %sv16i16 = shufflevector <16 x i16> %lv16i16, <16 x i16> undef, <16 x i32> zeroinitializer 124 125 %lv2i32 = load <2 x i32>, ptr undef 126 %sv2i32 = shufflevector <2 x i32> %lv2i32, <2 x i32> undef, <2 x i32> zeroinitializer 127 %lv4i32 = load <4 x i32>, ptr undef 128 %sv4i32 = shufflevector <4 x i32> %lv4i32, <4 x i32> undef, <4 x i32> zeroinitializer 129 %lv8i32 = load <8 x i32>, ptr undef 130 %sv8i32 = shufflevector <8 x i32> %lv8i32, <8 x i32> undef, <8 x i32> zeroinitializer 131 132 %lv2i64 = load <2 x i64>, ptr undef 133 %sv2i64 = shufflevector <2 x i64> %lv2i64, <2 x i64> undef, <2 x i32> zeroinitializer 134 %lv4i64 = load <4 x i64>, ptr undef 135 %sv4i64 = shufflevector <4 x i64> %lv4i64, <4 x i64> undef, <4 x i32> zeroinitializer 136 137 %lv2f16 = load <2 x half>, ptr undef 138 %sv2f16 = shufflevector <2 x half> %lv2f16, <2 x half> undef, <2 x i32> zeroinitializer 139 %lv4f16 = load <4 x half>, ptr undef 140 %sv4f16 = shufflevector <4 x half> %lv4f16, <4 x half> undef, <4 x i32> zeroinitializer 141 %lv8f16 = load <8 x half>, ptr undef 142 %sv8f16 = shufflevector <8 x half> %lv8f16, <8 x half> undef, <8 x i32> zeroinitializer 143 %lv16f16 = load <16 x half>, ptr undef 144 %sv16f16 = shufflevector <16 x half> %lv16f16, <16 x half> undef, <16 x i32> zeroinitializer 145 146 %lv2f32 = load <2 x float>, ptr undef 147 %sv2f32 = shufflevector <2 x float> %lv2f32, <2 x float> undef, <2 x i32> zeroinitializer 148 %lv4f32 = load <4 x float>, ptr undef 149 %sv4f32 = shufflevector <4 x float> %lv4f32, <4 x float> undef, <4 x i32> zeroinitializer 150 %lv8f32 = load <8 x float>, ptr undef 151 %sv8f32 = shufflevector <8 x float> %lv8f32, <8 x float> undef, <8 x i32> zeroinitializer 152 153 %lv2f64 = load <2 x double>, ptr undef 154 %sv2f64 = shufflevector <2 x double> %lv2f64, <2 x double> undef, <2 x i32> zeroinitializer 155 %lv4f64 = load <4 x double>, ptr undef 156 %sv4f64 = shufflevector <4 x double> %lv4f64, <4 x double> undef, <4 x i32> zeroinitializer 157 158 ret void 159} 160 161; Check ld1r generated from scalar FP loads 162 163define <4 x half> @ld1r_4h_float_shuff(ptr nocapture %x) { 164; CHECK-LABEL: 'ld1r_4h_float_shuff' 165; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load half, ptr %x, align 2 166; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <4 x half> undef, half %tmp, i32 0 167; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> zeroinitializer 168; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %lane 169; 170; CODESIZE-LABEL: 'ld1r_4h_float_shuff' 171; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load half, ptr %x, align 2 172; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <4 x half> undef, half %tmp, i32 0 173; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> zeroinitializer 174; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x half> %lane 175; 176entry: 177 %tmp = load half, ptr %x, align 2 178 %tmp1 = insertelement <4 x half> undef, half %tmp, i32 0 179 %lane = shufflevector <4 x half> %tmp1, <4 x half> undef, <4 x i32> zeroinitializer 180 ret <4 x half> %lane 181} 182 183define <8 x half> @ld1r_8h_float_shuff(ptr nocapture %x) { 184; CHECK-LABEL: 'ld1r_8h_float_shuff' 185; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load half, ptr %x, align 2 186; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <8 x half> undef, half %tmp, i32 0 187; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> zeroinitializer 188; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x half> %lane 189; 190; CODESIZE-LABEL: 'ld1r_8h_float_shuff' 191; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load half, ptr %x, align 2 192; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <8 x half> undef, half %tmp, i32 0 193; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> zeroinitializer 194; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x half> %lane 195; 196entry: 197 %tmp = load half, ptr %x, align 2 198 %tmp1 = insertelement <8 x half> undef, half %tmp, i32 0 199 %lane = shufflevector <8 x half> %tmp1, <8 x half> undef, <8 x i32> zeroinitializer 200 ret <8 x half> %lane 201} 202 203define <2 x float> @ld1r_2s_float_shuff(ptr nocapture %x) { 204; CHECK-LABEL: 'ld1r_2s_float_shuff' 205; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load float, ptr %x, align 4 206; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 207; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer 208; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x float> %lane 209; 210; CODESIZE-LABEL: 'ld1r_2s_float_shuff' 211; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load float, ptr %x, align 4 212; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 213; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer 214; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %lane 215; 216entry: 217 %tmp = load float, ptr %x, align 4 218 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 219 %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer 220 ret <2 x float> %lane 221} 222 223define <4 x float> @ld1r_4s_float_shuff(ptr nocapture %x) { 224; CHECK-LABEL: 'ld1r_4s_float_shuff' 225; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load float, ptr %x, align 4 226; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 227; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer 228; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %lane 229; 230; CODESIZE-LABEL: 'ld1r_4s_float_shuff' 231; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load float, ptr %x, align 4 232; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 233; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer 234; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %lane 235; 236entry: 237 %tmp = load float, ptr %x, align 4 238 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 239 %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer 240 ret <4 x float> %lane 241} 242 243define <2 x double> @ld1r_2d_double_shuff(ptr nocapture %x) { 244; CHECK-LABEL: 'ld1r_2d_double_shuff' 245; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load double, ptr %x, align 4 246; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 247; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer 248; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x double> %lane 249; 250; CODESIZE-LABEL: 'ld1r_2d_double_shuff' 251; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load double, ptr %x, align 4 252; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 253; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer 254; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %lane 255; 256entry: 257 %tmp = load double, ptr %x, align 4 258 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 259 %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer 260 ret <2 x double> %lane 261} 262 263; Check ld1r generated from scalar integer loads 264 265define <8 x i8> @ld1r_8b_int_shuff(ptr nocapture %x) { 266; CHECK-LABEL: 'ld1r_8b_int_shuff' 267; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i8, ptr %x, align 2 268; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <8 x i8> undef, i8 %tmp, i8 0 269; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer 270; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %lane 271; 272; CODESIZE-LABEL: 'ld1r_8b_int_shuff' 273; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i8, ptr %x, align 2 274; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <8 x i8> undef, i8 %tmp, i8 0 275; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer 276; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %lane 277; 278entry: 279 %tmp = load i8, ptr %x, align 2 280 %tmp1 = insertelement <8 x i8> undef, i8 %tmp, i8 0 281 %lane = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer 282 ret <8 x i8> %lane 283} 284 285define <16 x i8> @ld1r_16b_int_shuff(ptr nocapture %x) { 286; CHECK-LABEL: 'ld1r_16b_int_shuff' 287; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i8, ptr %x, align 2 288; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <16 x i8> undef, i8 %tmp, i8 0 289; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer 290; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %lane 291; 292; CODESIZE-LABEL: 'ld1r_16b_int_shuff' 293; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i8, ptr %x, align 2 294; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <16 x i8> undef, i8 %tmp, i8 0 295; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer 296; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %lane 297; 298entry: 299 %tmp = load i8, ptr %x, align 2 300 %tmp1 = insertelement <16 x i8> undef, i8 %tmp, i8 0 301 %lane = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer 302 ret <16 x i8> %lane 303} 304 305define <4 x i16> @ld1r_4h_int_shuff(ptr nocapture %x) { 306; CHECK-LABEL: 'ld1r_4h_int_shuff' 307; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i16, ptr %x, align 2 308; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <4 x i16> undef, i16 %tmp, i16 0 309; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 310; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %lane 311; 312; CODESIZE-LABEL: 'ld1r_4h_int_shuff' 313; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i16, ptr %x, align 2 314; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <4 x i16> undef, i16 %tmp, i16 0 315; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 316; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %lane 317; 318entry: 319 %tmp = load i16, ptr %x, align 2 320 %tmp1 = insertelement <4 x i16> undef, i16 %tmp, i16 0 321 %lane = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 322 ret <4 x i16> %lane 323} 324 325define <8 x i16> @ld1r_8h_int_shuff(ptr nocapture %x) { 326; CHECK-LABEL: 'ld1r_8h_int_shuff' 327; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i16, ptr %x, align 2 328; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <8 x i16> undef, i16 %tmp, i16 0 329; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer 330; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %lane 331; 332; CODESIZE-LABEL: 'ld1r_8h_int_shuff' 333; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i16, ptr %x, align 2 334; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <8 x i16> undef, i16 %tmp, i16 0 335; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer 336; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %lane 337; 338entry: 339 %tmp = load i16, ptr %x, align 2 340 %tmp1 = insertelement <8 x i16> undef, i16 %tmp, i16 0 341 %lane = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer 342 ret <8 x i16> %lane 343} 344 345define <2 x i32> @ld1r_2s_int_shuff(ptr nocapture %x) { 346; CHECK-LABEL: 'ld1r_2s_int_shuff' 347; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i32, ptr %x, align 4 348; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <2 x i32> undef, i32 %tmp, i32 0 349; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer 350; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i32> %lane 351; 352; CODESIZE-LABEL: 'ld1r_2s_int_shuff' 353; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i32, ptr %x, align 4 354; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <2 x i32> undef, i32 %tmp, i32 0 355; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer 356; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %lane 357; 358entry: 359 %tmp = load i32, ptr %x, align 4 360 %tmp1 = insertelement <2 x i32> undef, i32 %tmp, i32 0 361 %lane = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer 362 ret <2 x i32> %lane 363} 364 365define <4 x i32> @ld1r_4s_int_shuff(ptr nocapture %x) { 366; CHECK-LABEL: 'ld1r_4s_int_shuff' 367; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i32, ptr %x, align 4 368; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 369; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer 370; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lane 371; 372; CODESIZE-LABEL: 'ld1r_4s_int_shuff' 373; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i32, ptr %x, align 4 374; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 375; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer 376; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %lane 377; 378entry: 379 %tmp = load i32, ptr %x, align 4 380 %tmp1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 381 %lane = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer 382 ret <4 x i32> %lane 383} 384 385define <2 x i64> @ld1r_2d_int_shuff(ptr nocapture %x) { 386; CHECK-LABEL: 'ld1r_2d_int_shuff' 387; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i64, ptr %x, align 8 388; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 389; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 390; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %lane 391; 392; CODESIZE-LABEL: 'ld1r_2d_int_shuff' 393; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %tmp = load i64, ptr %x, align 8 394; CODESIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 395; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lane = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 396; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %lane 397; 398entry: 399 %tmp = load i64, ptr %x, align 8 400 %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0 401 %lane = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer 402 ret <2 x i64> %lane 403} 404 405define void @vld2(ptr %p) { 406; CHECK-LABEL: 'vld2' 407; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8 = load <4 x i8>, ptr %p, align 4 408; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2> 409; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 1, i32 3> 410; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = load <8 x i8>, ptr %p, align 8 411; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 412; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 413; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = load <16 x i8>, ptr %p, align 16 414; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 415; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 416; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = load <32 x i8>, ptr %p, align 32 417; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 418; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 419; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = load <4 x i16>, ptr %p, align 8 420; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 0, i32 2> 421; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 1, i32 3> 422; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = load <8 x i16>, ptr %p, align 16 423; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 424; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 425; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = load <16 x i16>, ptr %p, align 32 426; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 427; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 428; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16 = load <32 x i16>, ptr %p, align 64 429; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 430; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 431; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = load <4 x i32>, ptr %p, align 16 432; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 433; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 434; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = load <8 x i32>, ptr %p, align 32 435; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 436; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 437; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = load <16 x i32>, ptr %p, align 64 438; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 439; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 440; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32 = load <32 x i32>, ptr %p, align 128 441; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 442; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 443; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = load <4 x i64>, ptr %p, align 32 444; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 0, i32 2> 445; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 1, i32 3> 446; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64 = load <8 x i64>, ptr %p, align 64 447; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 448; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_1 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 449; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i64 = load <16 x i64>, ptr %p, align 128 450; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_0 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 451; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_1 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 452; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i64 = load <32 x i64>, ptr %p, align 256 453; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_0 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 454; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_1 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 455; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 456; 457; CODESIZE-LABEL: 'vld2' 458; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = load <4 x i8>, ptr %p, align 4 459; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2> 460; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 1, i32 3> 461; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = load <8 x i8>, ptr %p, align 8 462; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 463; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 464; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = load <16 x i8>, ptr %p, align 16 465; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 466; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 467; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = load <32 x i8>, ptr %p, align 32 468; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 469; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 470; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = load <4 x i16>, ptr %p, align 8 471; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 0, i32 2> 472; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 1, i32 3> 473; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = load <8 x i16>, ptr %p, align 16 474; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 475; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 476; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = load <16 x i16>, ptr %p, align 32 477; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 478; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 479; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16 = load <32 x i16>, ptr %p, align 64 480; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 481; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 482; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = load <4 x i32>, ptr %p, align 16 483; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 484; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 485; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = load <8 x i32>, ptr %p, align 32 486; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 487; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 488; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = load <16 x i32>, ptr %p, align 64 489; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 490; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 491; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32 = load <32 x i32>, ptr %p, align 128 492; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 493; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 494; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64 = load <4 x i64>, ptr %p, align 32 495; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 0, i32 2> 496; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 1, i32 3> 497; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64 = load <8 x i64>, ptr %p, align 64 498; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 499; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_1 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 500; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i64 = load <16 x i64>, ptr %p, align 128 501; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_0 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 502; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_1 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 503; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i64 = load <32 x i64>, ptr %p, align 256 504; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_0 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 505; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_1 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 506; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 507; 508 %v4i8 = load <4 x i8>, ptr %p 509 %v4i8_0 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 0, i32 2> 510 %v4i8_1 = shufflevector <4 x i8> %v4i8, <4 x i8> undef, <2 x i32> <i32 1, i32 3> 511 %v8i8 = load <8 x i8>, ptr %p 512 %v8i8_0 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 513 %v8i8_1 = shufflevector <8 x i8> %v8i8, <8 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 514 %v16i8 = load <16 x i8>, ptr %p 515 %v16i8_0 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 516 %v16i8_1 = shufflevector <16 x i8> %v16i8, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 517 %v32i8 = load <32 x i8>, ptr %p 518 %v32i8_0 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 519 %v32i8_1 = shufflevector <32 x i8> %v32i8, <32 x i8> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 520 521 %v4i16 = load <4 x i16>, ptr %p 522 %v4i16_0 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 0, i32 2> 523 %v4i16_1 = shufflevector <4 x i16> %v4i16, <4 x i16> undef, <2 x i32> <i32 1, i32 3> 524 %v8i16 = load <8 x i16>, ptr %p 525 %v8i16_0 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 526 %v8i16_1 = shufflevector <8 x i16> %v8i16, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 527 %v16i16 = load <16 x i16>, ptr %p 528 %v16i16_0 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 529 %v16i16_1 = shufflevector <16 x i16> %v16i16, <16 x i16> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 530 %v32i16 = load <32 x i16>, ptr %p 531 %v32i16_0 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 532 %v32i16_1 = shufflevector <32 x i16> %v32i16, <32 x i16> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 533 534 %v4i32 = load <4 x i32>, ptr %p 535 %v4i32_0 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 536 %v4i32_1 = shufflevector <4 x i32> %v4i32, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 537 %v8i32 = load <8 x i32>, ptr %p 538 %v8i32_0 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 539 %v8i32_1 = shufflevector <8 x i32> %v8i32, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 540 %v16i32 = load <16 x i32>, ptr %p 541 %v16i32_0 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 542 %v16i32_1 = shufflevector <16 x i32> %v16i32, <16 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 543 %v32i32 = load <32 x i32>, ptr %p 544 %v32i32_0 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 545 %v32i32_1 = shufflevector <32 x i32> %v32i32, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 546 547 %v2i64 = load <4 x i64>, ptr %p 548 %v2i64_0 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 0, i32 2> 549 %v2i64_1 = shufflevector <4 x i64> %v2i64, <4 x i64> undef, <2 x i32> <i32 1, i32 3> 550 %v4i64 = load <8 x i64>, ptr %p 551 %v4i64_0 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 552 %v4i64_1 = shufflevector <8 x i64> %v4i64, <8 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 553 %v8i64 = load <16 x i64>, ptr %p 554 %v8i64_0 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 555 %v8i64_1 = shufflevector <16 x i64> %v8i64, <16 x i64> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 556 %v16i64 = load <32 x i64>, ptr %p 557 %v16i64_0 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> 558 %v16i64_1 = shufflevector <32 x i64> %v16i64, <32 x i64> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> 559 560 ret void 561} 562 563 564define void @vld3(ptr %p) { 565; CHECK-LABEL: 'vld3' 566; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = load <6 x i8>, ptr %p, align 8 567; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3> 568; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_1 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 1, i32 4> 569; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_2 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 2, i32 5> 570; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = load <12 x i8>, ptr %p, align 16 571; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_0 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 572; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 573; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 574; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <24 x i8>, ptr %p, align 32 575; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 576; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 577; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 578; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <48 x i8>, ptr %p, align 64 579; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 580; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 581; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 582; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <6 x i16>, ptr %p, align 16 583; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 0, i32 3> 584; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 1, i32 4> 585; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 2, i32 5> 586; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <12 x i16>, ptr %p, align 32 587; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 588; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 589; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 590; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <24 x i16>, ptr %p, align 64 591; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 592; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 593; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 594; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <48 x i16>, ptr %p, align 128 595; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 596; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 597; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 598; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <6 x i32>, ptr %p, align 32 599; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 0, i32 3> 600; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 1, i32 4> 601; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 2, i32 5> 602; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <12 x i32>, ptr %p, align 64 603; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 604; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 605; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 606; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <24 x i32>, ptr %p, align 128 607; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 608; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 609; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 610; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <48 x i32>, ptr %p, align 256 611; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 612; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 613; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 614; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <6 x i64>, ptr %p, align 64 615; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 0, i32 3> 616; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 1, i32 4> 617; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_2 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 2, i32 5> 618; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = load <12 x i64>, ptr %p, align 128 619; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 620; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_1 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 621; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_2 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 622; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64 = load <24 x i64>, ptr %p, align 256 623; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_0 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 624; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_1 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 625; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_2 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 626; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i64 = load <48 x i64>, ptr %p, align 512 627; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_0 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 628; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_1 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 629; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_2 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 630; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 631; 632; CODESIZE-LABEL: 'vld3' 633; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = load <6 x i8>, ptr %p, align 8 634; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3> 635; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_1 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 1, i32 4> 636; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_2 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 2, i32 5> 637; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = load <12 x i8>, ptr %p, align 16 638; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_0 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 639; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 640; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 641; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <24 x i8>, ptr %p, align 32 642; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 643; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 644; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 645; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <48 x i8>, ptr %p, align 64 646; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 647; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 648; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 649; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <6 x i16>, ptr %p, align 16 650; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 0, i32 3> 651; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 1, i32 4> 652; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 2, i32 5> 653; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <12 x i16>, ptr %p, align 32 654; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 655; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 656; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 657; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <24 x i16>, ptr %p, align 64 658; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 659; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 660; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 661; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <48 x i16>, ptr %p, align 128 662; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 663; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 664; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 665; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <6 x i32>, ptr %p, align 32 666; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 0, i32 3> 667; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 1, i32 4> 668; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 2, i32 5> 669; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <12 x i32>, ptr %p, align 64 670; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 671; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 672; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 673; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <24 x i32>, ptr %p, align 128 674; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 675; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 676; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 677; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <48 x i32>, ptr %p, align 256 678; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 679; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 680; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 681; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <6 x i64>, ptr %p, align 64 682; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 0, i32 3> 683; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 1, i32 4> 684; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_2 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 2, i32 5> 685; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = load <12 x i64>, ptr %p, align 128 686; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 687; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_1 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 688; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_2 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 689; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64 = load <24 x i64>, ptr %p, align 256 690; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_0 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 691; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_1 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 692; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_2 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 693; CODESIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i64 = load <48 x i64>, ptr %p, align 512 694; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_0 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 695; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_1 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 696; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_2 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 697; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 698; 699 %v2i8 = load <6 x i8>, ptr %p 700 %v2i8_0 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 0, i32 3> 701 %v2i8_1 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 1, i32 4> 702 %v2i8_2 = shufflevector <6 x i8> %v2i8, <6 x i8> undef, <2 x i32> <i32 2, i32 5> 703 %v4i8 = load <12 x i8>, ptr %p 704 %v4i8_0 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 705 %v4i8_1 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 706 %v4i8_2 = shufflevector <12 x i8> %v4i8, <12 x i8> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 707 %v8i8 = load <24 x i8>, ptr %p 708 %v8i8_0 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 709 %v8i8_1 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 710 %v8i8_2 = shufflevector <24 x i8> %v8i8, <24 x i8> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 711 %v16i8 = load <48 x i8>, ptr %p 712 %v16i8_0 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 713 %v16i8_1 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 714 %v16i8_2 = shufflevector <48 x i8> %v16i8, <48 x i8> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 715 716 %v2i16 = load <6 x i16>, ptr %p 717 %v2i16_0 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 0, i32 3> 718 %v2i16_1 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 1, i32 4> 719 %v2i16_2 = shufflevector <6 x i16> %v2i16, <6 x i16> undef, <2 x i32> <i32 2, i32 5> 720 %v4i16 = load <12 x i16>, ptr %p 721 %v4i16_0 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 722 %v4i16_1 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 723 %v4i16_2 = shufflevector <12 x i16> %v4i16, <12 x i16> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 724 %v8i16 = load <24 x i16>, ptr %p 725 %v8i16_0 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 726 %v8i16_1 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 727 %v8i16_2 = shufflevector <24 x i16> %v8i16, <24 x i16> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 728 %v16i16 = load <48 x i16>, ptr %p 729 %v16i16_0 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 730 %v16i16_1 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 731 %v16i16_2 = shufflevector <48 x i16> %v16i16, <48 x i16> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 732 733 %v2i32 = load <6 x i32>, ptr %p 734 %v2i32_0 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 0, i32 3> 735 %v2i32_1 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 1, i32 4> 736 %v2i32_2 = shufflevector <6 x i32> %v2i32, <6 x i32> undef, <2 x i32> <i32 2, i32 5> 737 %v4i32 = load <12 x i32>, ptr %p 738 %v4i32_0 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 739 %v4i32_1 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 740 %v4i32_2 = shufflevector <12 x i32> %v4i32, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 741 %v8i32 = load <24 x i32>, ptr %p 742 %v8i32_0 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 743 %v8i32_1 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 744 %v8i32_2 = shufflevector <24 x i32> %v8i32, <24 x i32> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 745 %v16i32 = load <48 x i32>, ptr %p 746 %v16i32_0 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 747 %v16i32_1 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 748 %v16i32_2 = shufflevector <48 x i32> %v16i32, <48 x i32> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 749 750 %v2i64 = load <6 x i64>, ptr %p 751 %v2i64_0 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 0, i32 3> 752 %v2i64_1 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 1, i32 4> 753 %v2i64_2 = shufflevector <6 x i64> %v2i64, <6 x i64> undef, <2 x i32> <i32 2, i32 5> 754 %v4i64 = load <12 x i64>, ptr %p 755 %v4i64_0 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 756 %v4i64_1 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 757 %v4i64_2 = shufflevector <12 x i64> %v4i64, <12 x i64> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 758 %v8i64 = load <24 x i64>, ptr %p 759 %v8i64_0 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21> 760 %v8i64_1 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22> 761 %v8i64_2 = shufflevector <24 x i64> %v8i64, <24 x i64> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23> 762 %v16i64 = load <48 x i64>, ptr %p 763 %v16i64_0 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> 764 %v16i64_1 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22, i32 25, i32 28, i32 31, i32 34, i32 37, i32 40, i32 43, i32 46> 765 %v16i64_2 = shufflevector <48 x i64> %v16i64, <48 x i64> undef, <16 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23, i32 26, i32 29, i32 32, i32 35, i32 38, i32 41, i32 44, i32 47> 766 767 ret void 768} 769 770define void @vld4(ptr %p) { 771; CHECK-LABEL: 'vld4' 772; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8 773; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4> 774; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5> 775; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6> 776; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7> 777; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 16 778; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 779; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 780; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 781; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 782; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32 783; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 784; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 785; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 786; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 787; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64 788; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 789; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 790; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 791; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 792; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 16 793; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4> 794; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5> 795; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6> 796; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7> 797; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32 798; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 799; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 800; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 801; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 802; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64 803; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 804; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 805; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 806; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 807; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128 808; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 809; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 810; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 811; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 812; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32 813; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4> 814; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5> 815; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6> 816; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7> 817; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64 818; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 819; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 820; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 821; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 822; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128 823; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 824; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 825; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 826; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 827; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256 828; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 829; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 830; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 831; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 832; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64 833; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4> 834; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5> 835; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6> 836; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7> 837; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128 838; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 839; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 840; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 841; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 842; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256 843; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 844; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 845; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 846; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 847; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512 848; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 849; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 850; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 851; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 852; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 853; 854; CODESIZE-LABEL: 'vld4' 855; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8 = load <8 x i8>, ptr %p, align 8 856; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4> 857; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5> 858; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6> 859; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7> 860; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = load <16 x i8>, ptr %p, align 16 861; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 862; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 863; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 864; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 865; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8 = load <32 x i8>, ptr %p, align 32 866; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 867; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 868; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 869; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 870; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i8 = load <64 x i8>, ptr %p, align 64 871; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 872; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 873; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 874; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 875; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = load <8 x i16>, ptr %p, align 16 876; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4> 877; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5> 878; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6> 879; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7> 880; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16 = load <16 x i16>, ptr %p, align 32 881; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 882; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 883; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 884; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 885; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i16 = load <32 x i16>, ptr %p, align 64 886; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 887; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 888; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 889; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 890; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = load <64 x i16>, ptr %p, align 128 891; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 892; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 893; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 894; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 895; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32 = load <8 x i32>, ptr %p, align 32 896; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4> 897; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5> 898; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6> 899; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7> 900; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i32 = load <16 x i32>, ptr %p, align 64 901; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 902; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 903; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 904; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 905; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i32 = load <32 x i32>, ptr %p, align 128 906; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 907; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 908; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 909; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 910; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16i32 = load <64 x i32>, ptr %p, align 256 911; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 912; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 913; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 914; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 915; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2i64 = load <8 x i64>, ptr %p, align 64 916; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4> 917; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5> 918; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6> 919; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7> 920; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4i64 = load <16 x i64>, ptr %p, align 128 921; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 922; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 923; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 924; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 925; CODESIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8i64 = load <32 x i64>, ptr %p, align 256 926; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 927; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 928; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 929; CODESIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 930; CODESIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16i64 = load <64 x i64>, ptr %p, align 512 931; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 932; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 933; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 934; CODESIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 935; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 936; 937 %v2i8 = load <8 x i8>, ptr %p 938 %v2i8_0 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 0, i32 4> 939 %v2i8_1 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 1, i32 5> 940 %v2i8_2 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 2, i32 6> 941 %v2i8_3 = shufflevector <8 x i8> %v2i8, <8 x i8> undef, <2 x i32> <i32 3, i32 7> 942 %v4i8 = load <16 x i8>, ptr %p 943 %v4i8_0 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 944 %v4i8_1 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 945 %v4i8_2 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 946 %v4i8_3 = shufflevector <16 x i8> %v4i8, <16 x i8> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 947 %v8i8 = load <32 x i8>, ptr %p 948 %v8i8_0 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 949 %v8i8_1 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 950 %v8i8_2 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 951 %v8i8_3 = shufflevector <32 x i8> %v8i8, <32 x i8> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 952 %v16i8 = load <64 x i8>, ptr %p 953 %v16i8_0 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 954 %v16i8_1 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 955 %v16i8_2 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 956 %v16i8_3 = shufflevector <64 x i8> %v16i8, <64 x i8> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 957 958 %v2i16 = load <8 x i16>, ptr %p 959 %v2i16_0 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 0, i32 4> 960 %v2i16_1 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 1, i32 5> 961 %v2i16_2 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 2, i32 6> 962 %v2i16_3 = shufflevector <8 x i16> %v2i16, <8 x i16> undef, <2 x i32> <i32 3, i32 7> 963 %v4i16 = load <16 x i16>, ptr %p 964 %v4i16_0 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 965 %v4i16_1 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 966 %v4i16_2 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 967 %v4i16_3 = shufflevector <16 x i16> %v4i16, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 968 %v8i16 = load <32 x i16>, ptr %p 969 %v8i16_0 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 970 %v8i16_1 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 971 %v8i16_2 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 972 %v8i16_3 = shufflevector <32 x i16> %v8i16, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 973 %v16i16 = load <64 x i16>, ptr %p 974 %v16i16_0 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 975 %v16i16_1 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 976 %v16i16_2 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 977 %v16i16_3 = shufflevector <64 x i16> %v16i16, <64 x i16> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 978 979 %v2i32 = load <8 x i32>, ptr %p 980 %v2i32_0 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 0, i32 4> 981 %v2i32_1 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 1, i32 5> 982 %v2i32_2 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 2, i32 6> 983 %v2i32_3 = shufflevector <8 x i32> %v2i32, <8 x i32> undef, <2 x i32> <i32 3, i32 7> 984 %v4i32 = load <16 x i32>, ptr %p 985 %v4i32_0 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 986 %v4i32_1 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 987 %v4i32_2 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 988 %v4i32_3 = shufflevector <16 x i32> %v4i32, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 989 %v8i32 = load <32 x i32>, ptr %p 990 %v8i32_0 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 991 %v8i32_1 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 992 %v8i32_2 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 993 %v8i32_3 = shufflevector <32 x i32> %v8i32, <32 x i32> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 994 %v16i32 = load <64 x i32>, ptr %p 995 %v16i32_0 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 996 %v16i32_1 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 997 %v16i32_2 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 998 %v16i32_3 = shufflevector <64 x i32> %v16i32, <64 x i32> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 999 1000 %v2i64 = load <8 x i64>, ptr %p 1001 %v2i64_0 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 0, i32 4> 1002 %v2i64_1 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 1, i32 5> 1003 %v2i64_2 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 2, i32 6> 1004 %v2i64_3 = shufflevector <8 x i64> %v2i64, <8 x i64> undef, <2 x i32> <i32 3, i32 7> 1005 %v4i64 = load <16 x i64>, ptr %p 1006 %v4i64_0 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 1007 %v4i64_1 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 1008 %v4i64_2 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 1009 %v4i64_3 = shufflevector <16 x i64> %v4i64, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 1010 %v8i64 = load <32 x i64>, ptr %p 1011 %v8i64_0 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 1012 %v8i64_1 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 1013 %v8i64_2 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 1014 %v8i64_3 = shufflevector <32 x i64> %v8i64, <32 x i64> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 1015 %v16i64 = load <64 x i64>, ptr %p 1016 %v16i64_0 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 32, i32 36, i32 40, i32 44, i32 48, i32 52, i32 56, i32 60> 1017 %v16i64_1 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 61> 1018 %v16i64_2 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 34, i32 38, i32 42, i32 46, i32 50, i32 54, i32 58, i32 62> 1019 %v16i64_3 = shufflevector <64 x i64> %v16i64, <64 x i64> undef, <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31, i32 35, i32 39, i32 43, i32 47, i32 51, i32 55, i32 59, i32 63> 1020 1021 ret void 1022} 1023 1024