1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE2 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 5; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 6; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 7; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX 8 9; 10; vXi8 11; 12 13define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) { 14; SSE2-LABEL: @loadext_2i8_to_2i64( 15; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 16; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64> 17; SSE2-NEXT: ret <2 x i64> [[V1]] 18; 19; SLM-LABEL: @loadext_2i8_to_2i64( 20; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 21; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> 22; SLM-NEXT: ret <2 x i64> [[TMP3]] 23; 24; AVX-LABEL: @loadext_2i8_to_2i64( 25; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 26; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64> 27; AVX-NEXT: ret <2 x i64> [[TMP3]] 28; 29 %p1 = getelementptr inbounds i8, ptr %p0, i64 1 30 %i0 = load i8, ptr %p0, align 1 31 %i1 = load i8, ptr %p1, align 1 32 %x0 = zext i8 %i0 to i64 33 %x1 = zext i8 %i1 to i64 34 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 35 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 36 ret <2 x i64> %v1 37} 38 39define <4 x i32> @loadext_4i8_to_4i32(ptr %p0) { 40; SSE2-LABEL: @loadext_4i8_to_4i32( 41; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1 42; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 43; SSE2-NEXT: ret <4 x i32> [[TMP3]] 44; 45; SLM-LABEL: @loadext_4i8_to_4i32( 46; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1 47; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 48; SLM-NEXT: ret <4 x i32> [[TMP3]] 49; 50; AVX-LABEL: @loadext_4i8_to_4i32( 51; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1 52; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> 53; AVX-NEXT: ret <4 x i32> [[TMP3]] 54; 55 %p1 = getelementptr inbounds i8, ptr %p0, i64 1 56 %p2 = getelementptr inbounds i8, ptr %p0, i64 2 57 %p3 = getelementptr inbounds i8, ptr %p0, i64 3 58 %i0 = load i8, ptr %p0, align 1 59 %i1 = load i8, ptr %p1, align 1 60 %i2 = load i8, ptr %p2, align 1 61 %i3 = load i8, ptr %p3, align 1 62 %x0 = zext i8 %i0 to i32 63 %x1 = zext i8 %i1 to i32 64 %x2 = zext i8 %i2 to i32 65 %x3 = zext i8 %i3 to i32 66 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0 67 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1 68 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2 69 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3 70 ret <4 x i32> %v3 71} 72 73define <4 x i64> @loadext_4i8_to_4i64(ptr %p0) { 74; SSE2-LABEL: @loadext_4i8_to_4i64( 75; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1 76; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 77; SSE2-NEXT: ret <4 x i64> [[TMP3]] 78; 79; SLM-LABEL: @loadext_4i8_to_4i64( 80; SLM-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1 81; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 82; SLM-NEXT: ret <4 x i64> [[TMP3]] 83; 84; AVX-LABEL: @loadext_4i8_to_4i64( 85; AVX-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[P0:%.*]], align 1 86; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i64> 87; AVX-NEXT: ret <4 x i64> [[TMP3]] 88; 89 %p1 = getelementptr inbounds i8, ptr %p0, i64 1 90 %p2 = getelementptr inbounds i8, ptr %p0, i64 2 91 %p3 = getelementptr inbounds i8, ptr %p0, i64 3 92 %i0 = load i8, ptr %p0, align 1 93 %i1 = load i8, ptr %p1, align 1 94 %i2 = load i8, ptr %p2, align 1 95 %i3 = load i8, ptr %p3, align 1 96 %x0 = zext i8 %i0 to i64 97 %x1 = zext i8 %i1 to i64 98 %x2 = zext i8 %i2 to i64 99 %x3 = zext i8 %i3 to i64 100 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 101 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 102 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 103 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 104 ret <4 x i64> %v3 105} 106 107define <8 x i16> @loadext_8i8_to_8i16(ptr %p0) { 108; SSE2-LABEL: @loadext_8i8_to_8i16( 109; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1 110; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 111; SSE2-NEXT: ret <8 x i16> [[TMP3]] 112; 113; SLM-LABEL: @loadext_8i8_to_8i16( 114; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1 115; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 116; SLM-NEXT: ret <8 x i16> [[TMP3]] 117; 118; AVX-LABEL: @loadext_8i8_to_8i16( 119; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1 120; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16> 121; AVX-NEXT: ret <8 x i16> [[TMP3]] 122; 123 %p1 = getelementptr inbounds i8, ptr %p0, i64 1 124 %p2 = getelementptr inbounds i8, ptr %p0, i64 2 125 %p3 = getelementptr inbounds i8, ptr %p0, i64 3 126 %p4 = getelementptr inbounds i8, ptr %p0, i64 4 127 %p5 = getelementptr inbounds i8, ptr %p0, i64 5 128 %p6 = getelementptr inbounds i8, ptr %p0, i64 6 129 %p7 = getelementptr inbounds i8, ptr %p0, i64 7 130 %i0 = load i8, ptr %p0, align 1 131 %i1 = load i8, ptr %p1, align 1 132 %i2 = load i8, ptr %p2, align 1 133 %i3 = load i8, ptr %p3, align 1 134 %i4 = load i8, ptr %p4, align 1 135 %i5 = load i8, ptr %p5, align 1 136 %i6 = load i8, ptr %p6, align 1 137 %i7 = load i8, ptr %p7, align 1 138 %x0 = zext i8 %i0 to i16 139 %x1 = zext i8 %i1 to i16 140 %x2 = zext i8 %i2 to i16 141 %x3 = zext i8 %i3 to i16 142 %x4 = zext i8 %i4 to i16 143 %x5 = zext i8 %i5 to i16 144 %x6 = zext i8 %i6 to i16 145 %x7 = zext i8 %i7 to i16 146 %v0 = insertelement <8 x i16> undef, i16 %x0, i32 0 147 %v1 = insertelement <8 x i16> %v0, i16 %x1, i32 1 148 %v2 = insertelement <8 x i16> %v1, i16 %x2, i32 2 149 %v3 = insertelement <8 x i16> %v2, i16 %x3, i32 3 150 %v4 = insertelement <8 x i16> %v3, i16 %x4, i32 4 151 %v5 = insertelement <8 x i16> %v4, i16 %x5, i32 5 152 %v6 = insertelement <8 x i16> %v5, i16 %x6, i32 6 153 %v7 = insertelement <8 x i16> %v6, i16 %x7, i32 7 154 ret <8 x i16> %v7 155} 156 157define <8 x i32> @loadext_8i8_to_8i32(ptr %p0) { 158; SSE2-LABEL: @loadext_8i8_to_8i32( 159; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1 160; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 161; SSE2-NEXT: ret <8 x i32> [[TMP3]] 162; 163; SLM-LABEL: @loadext_8i8_to_8i32( 164; SLM-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1 165; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 166; SLM-NEXT: ret <8 x i32> [[TMP3]] 167; 168; AVX-LABEL: @loadext_8i8_to_8i32( 169; AVX-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[P0:%.*]], align 1 170; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i32> 171; AVX-NEXT: ret <8 x i32> [[TMP3]] 172; 173 %p1 = getelementptr inbounds i8, ptr %p0, i64 1 174 %p2 = getelementptr inbounds i8, ptr %p0, i64 2 175 %p3 = getelementptr inbounds i8, ptr %p0, i64 3 176 %p4 = getelementptr inbounds i8, ptr %p0, i64 4 177 %p5 = getelementptr inbounds i8, ptr %p0, i64 5 178 %p6 = getelementptr inbounds i8, ptr %p0, i64 6 179 %p7 = getelementptr inbounds i8, ptr %p0, i64 7 180 %i0 = load i8, ptr %p0, align 1 181 %i1 = load i8, ptr %p1, align 1 182 %i2 = load i8, ptr %p2, align 1 183 %i3 = load i8, ptr %p3, align 1 184 %i4 = load i8, ptr %p4, align 1 185 %i5 = load i8, ptr %p5, align 1 186 %i6 = load i8, ptr %p6, align 1 187 %i7 = load i8, ptr %p7, align 1 188 %x0 = zext i8 %i0 to i32 189 %x1 = zext i8 %i1 to i32 190 %x2 = zext i8 %i2 to i32 191 %x3 = zext i8 %i3 to i32 192 %x4 = zext i8 %i4 to i32 193 %x5 = zext i8 %i5 to i32 194 %x6 = zext i8 %i6 to i32 195 %x7 = zext i8 %i7 to i32 196 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0 197 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1 198 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2 199 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3 200 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4 201 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5 202 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6 203 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7 204 ret <8 x i32> %v7 205} 206 207define <16 x i16> @loadext_16i8_to_16i16(ptr %p0) { 208; SSE2-LABEL: @loadext_16i8_to_16i16( 209; SSE2-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1 210; SSE2-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 211; SSE2-NEXT: ret <16 x i16> [[TMP3]] 212; 213; SLM-LABEL: @loadext_16i8_to_16i16( 214; SLM-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1 215; SLM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 216; SLM-NEXT: ret <16 x i16> [[TMP3]] 217; 218; AVX-LABEL: @loadext_16i8_to_16i16( 219; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[P0:%.*]], align 1 220; AVX-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i16> 221; AVX-NEXT: ret <16 x i16> [[TMP3]] 222; 223 %p1 = getelementptr inbounds i8, ptr %p0, i64 1 224 %p2 = getelementptr inbounds i8, ptr %p0, i64 2 225 %p3 = getelementptr inbounds i8, ptr %p0, i64 3 226 %p4 = getelementptr inbounds i8, ptr %p0, i64 4 227 %p5 = getelementptr inbounds i8, ptr %p0, i64 5 228 %p6 = getelementptr inbounds i8, ptr %p0, i64 6 229 %p7 = getelementptr inbounds i8, ptr %p0, i64 7 230 %p8 = getelementptr inbounds i8, ptr %p0, i64 8 231 %p9 = getelementptr inbounds i8, ptr %p0, i64 9 232 %p10 = getelementptr inbounds i8, ptr %p0, i64 10 233 %p11 = getelementptr inbounds i8, ptr %p0, i64 11 234 %p12 = getelementptr inbounds i8, ptr %p0, i64 12 235 %p13 = getelementptr inbounds i8, ptr %p0, i64 13 236 %p14 = getelementptr inbounds i8, ptr %p0, i64 14 237 %p15 = getelementptr inbounds i8, ptr %p0, i64 15 238 %i0 = load i8, ptr %p0, align 1 239 %i1 = load i8, ptr %p1, align 1 240 %i2 = load i8, ptr %p2, align 1 241 %i3 = load i8, ptr %p3, align 1 242 %i4 = load i8, ptr %p4, align 1 243 %i5 = load i8, ptr %p5, align 1 244 %i6 = load i8, ptr %p6, align 1 245 %i7 = load i8, ptr %p7, align 1 246 %i8 = load i8, ptr %p8, align 1 247 %i9 = load i8, ptr %p9, align 1 248 %i10 = load i8, ptr %p10, align 1 249 %i11 = load i8, ptr %p11, align 1 250 %i12 = load i8, ptr %p12, align 1 251 %i13 = load i8, ptr %p13, align 1 252 %i14 = load i8, ptr %p14, align 1 253 %i15 = load i8, ptr %p15, align 1 254 %x0 = zext i8 %i0 to i16 255 %x1 = zext i8 %i1 to i16 256 %x2 = zext i8 %i2 to i16 257 %x3 = zext i8 %i3 to i16 258 %x4 = zext i8 %i4 to i16 259 %x5 = zext i8 %i5 to i16 260 %x6 = zext i8 %i6 to i16 261 %x7 = zext i8 %i7 to i16 262 %x8 = zext i8 %i8 to i16 263 %x9 = zext i8 %i9 to i16 264 %x10 = zext i8 %i10 to i16 265 %x11 = zext i8 %i11 to i16 266 %x12 = zext i8 %i12 to i16 267 %x13 = zext i8 %i13 to i16 268 %x14 = zext i8 %i14 to i16 269 %x15 = zext i8 %i15 to i16 270 %v0 = insertelement <16 x i16> undef, i16 %x0, i32 0 271 %v1 = insertelement <16 x i16> %v0, i16 %x1, i32 1 272 %v2 = insertelement <16 x i16> %v1, i16 %x2, i32 2 273 %v3 = insertelement <16 x i16> %v2, i16 %x3, i32 3 274 %v4 = insertelement <16 x i16> %v3, i16 %x4, i32 4 275 %v5 = insertelement <16 x i16> %v4, i16 %x5, i32 5 276 %v6 = insertelement <16 x i16> %v5, i16 %x6, i32 6 277 %v7 = insertelement <16 x i16> %v6, i16 %x7, i32 7 278 %v8 = insertelement <16 x i16> %v7, i16 %x8, i32 8 279 %v9 = insertelement <16 x i16> %v8, i16 %x9, i32 9 280 %v10 = insertelement <16 x i16> %v9, i16 %x10, i32 10 281 %v11 = insertelement <16 x i16> %v10, i16 %x11, i32 11 282 %v12 = insertelement <16 x i16> %v11, i16 %x12, i32 12 283 %v13 = insertelement <16 x i16> %v12, i16 %x13, i32 13 284 %v14 = insertelement <16 x i16> %v13, i16 %x14, i32 14 285 %v15 = insertelement <16 x i16> %v14, i16 %x15, i32 15 286 ret <16 x i16> %v15 287} 288 289; 290; vXi16 291; 292 293define <2 x i64> @loadext_2i16_to_2i64(ptr %p0) { 294; SSE2-LABEL: @loadext_2i16_to_2i64( 295; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1 296; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 297; SSE2-NEXT: ret <2 x i64> [[TMP3]] 298; 299; SLM-LABEL: @loadext_2i16_to_2i64( 300; SLM-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1 301; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 302; SLM-NEXT: ret <2 x i64> [[TMP3]] 303; 304; AVX-LABEL: @loadext_2i16_to_2i64( 305; AVX-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[P0:%.*]], align 1 306; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i64> 307; AVX-NEXT: ret <2 x i64> [[TMP3]] 308; 309 %p1 = getelementptr inbounds i16, ptr %p0, i64 1 310 %i0 = load i16, ptr %p0, align 1 311 %i1 = load i16, ptr %p1, align 1 312 %x0 = zext i16 %i0 to i64 313 %x1 = zext i16 %i1 to i64 314 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 315 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 316 ret <2 x i64> %v1 317} 318 319define <4 x i32> @loadext_4i16_to_4i32(ptr %p0) { 320; SSE2-LABEL: @loadext_4i16_to_4i32( 321; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1 322; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 323; SSE2-NEXT: ret <4 x i32> [[TMP3]] 324; 325; SLM-LABEL: @loadext_4i16_to_4i32( 326; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1 327; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 328; SLM-NEXT: ret <4 x i32> [[TMP3]] 329; 330; AVX-LABEL: @loadext_4i16_to_4i32( 331; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1 332; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> 333; AVX-NEXT: ret <4 x i32> [[TMP3]] 334; 335 %p1 = getelementptr inbounds i16, ptr %p0, i64 1 336 %p2 = getelementptr inbounds i16, ptr %p0, i64 2 337 %p3 = getelementptr inbounds i16, ptr %p0, i64 3 338 %i0 = load i16, ptr %p0, align 1 339 %i1 = load i16, ptr %p1, align 1 340 %i2 = load i16, ptr %p2, align 1 341 %i3 = load i16, ptr %p3, align 1 342 %x0 = zext i16 %i0 to i32 343 %x1 = zext i16 %i1 to i32 344 %x2 = zext i16 %i2 to i32 345 %x3 = zext i16 %i3 to i32 346 %v0 = insertelement <4 x i32> undef, i32 %x0, i32 0 347 %v1 = insertelement <4 x i32> %v0, i32 %x1, i32 1 348 %v2 = insertelement <4 x i32> %v1, i32 %x2, i32 2 349 %v3 = insertelement <4 x i32> %v2, i32 %x3, i32 3 350 ret <4 x i32> %v3 351} 352 353define <4 x i64> @loadext_4i16_to_4i64(ptr %p0) { 354; SSE2-LABEL: @loadext_4i16_to_4i64( 355; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1 356; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 357; SSE2-NEXT: ret <4 x i64> [[TMP3]] 358; 359; SLM-LABEL: @loadext_4i16_to_4i64( 360; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1 361; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 362; SLM-NEXT: ret <4 x i64> [[TMP3]] 363; 364; AVX-LABEL: @loadext_4i16_to_4i64( 365; AVX-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[P0:%.*]], align 1 366; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i64> 367; AVX-NEXT: ret <4 x i64> [[TMP3]] 368; 369 %p1 = getelementptr inbounds i16, ptr %p0, i64 1 370 %p2 = getelementptr inbounds i16, ptr %p0, i64 2 371 %p3 = getelementptr inbounds i16, ptr %p0, i64 3 372 %i0 = load i16, ptr %p0, align 1 373 %i1 = load i16, ptr %p1, align 1 374 %i2 = load i16, ptr %p2, align 1 375 %i3 = load i16, ptr %p3, align 1 376 %x0 = zext i16 %i0 to i64 377 %x1 = zext i16 %i1 to i64 378 %x2 = zext i16 %i2 to i64 379 %x3 = zext i16 %i3 to i64 380 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 381 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 382 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 383 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 384 ret <4 x i64> %v3 385} 386 387define <8 x i32> @loadext_8i16_to_8i32(ptr %p0) { 388; SSE2-LABEL: @loadext_8i16_to_8i32( 389; SSE2-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1 390; SSE2-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 391; SSE2-NEXT: ret <8 x i32> [[TMP3]] 392; 393; SLM-LABEL: @loadext_8i16_to_8i32( 394; SLM-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1 395; SLM-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 396; SLM-NEXT: ret <8 x i32> [[TMP3]] 397; 398; AVX-LABEL: @loadext_8i16_to_8i32( 399; AVX-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[P0:%.*]], align 1 400; AVX-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 401; AVX-NEXT: ret <8 x i32> [[TMP3]] 402; 403 %p1 = getelementptr inbounds i16, ptr %p0, i64 1 404 %p2 = getelementptr inbounds i16, ptr %p0, i64 2 405 %p3 = getelementptr inbounds i16, ptr %p0, i64 3 406 %p4 = getelementptr inbounds i16, ptr %p0, i64 4 407 %p5 = getelementptr inbounds i16, ptr %p0, i64 5 408 %p6 = getelementptr inbounds i16, ptr %p0, i64 6 409 %p7 = getelementptr inbounds i16, ptr %p0, i64 7 410 %i0 = load i16, ptr %p0, align 1 411 %i1 = load i16, ptr %p1, align 1 412 %i2 = load i16, ptr %p2, align 1 413 %i3 = load i16, ptr %p3, align 1 414 %i4 = load i16, ptr %p4, align 1 415 %i5 = load i16, ptr %p5, align 1 416 %i6 = load i16, ptr %p6, align 1 417 %i7 = load i16, ptr %p7, align 1 418 %x0 = zext i16 %i0 to i32 419 %x1 = zext i16 %i1 to i32 420 %x2 = zext i16 %i2 to i32 421 %x3 = zext i16 %i3 to i32 422 %x4 = zext i16 %i4 to i32 423 %x5 = zext i16 %i5 to i32 424 %x6 = zext i16 %i6 to i32 425 %x7 = zext i16 %i7 to i32 426 %v0 = insertelement <8 x i32> undef, i32 %x0, i32 0 427 %v1 = insertelement <8 x i32> %v0, i32 %x1, i32 1 428 %v2 = insertelement <8 x i32> %v1, i32 %x2, i32 2 429 %v3 = insertelement <8 x i32> %v2, i32 %x3, i32 3 430 %v4 = insertelement <8 x i32> %v3, i32 %x4, i32 4 431 %v5 = insertelement <8 x i32> %v4, i32 %x5, i32 5 432 %v6 = insertelement <8 x i32> %v5, i32 %x6, i32 6 433 %v7 = insertelement <8 x i32> %v6, i32 %x7, i32 7 434 ret <8 x i32> %v7 435} 436 437; 438; vXi32 439; 440 441define <2 x i64> @loadext_2i32_to_2i64(ptr %p0) { 442; SSE2-LABEL: @loadext_2i32_to_2i64( 443; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1 444; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 445; SSE2-NEXT: ret <2 x i64> [[TMP3]] 446; 447; SLM-LABEL: @loadext_2i32_to_2i64( 448; SLM-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1 449; SLM-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 450; SLM-NEXT: ret <2 x i64> [[TMP3]] 451; 452; AVX-LABEL: @loadext_2i32_to_2i64( 453; AVX-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[P0:%.*]], align 1 454; AVX-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> 455; AVX-NEXT: ret <2 x i64> [[TMP3]] 456; 457 %p1 = getelementptr inbounds i32, ptr %p0, i64 1 458 %i0 = load i32, ptr %p0, align 1 459 %i1 = load i32, ptr %p1, align 1 460 %x0 = zext i32 %i0 to i64 461 %x1 = zext i32 %i1 to i64 462 %v0 = insertelement <2 x i64> undef, i64 %x0, i32 0 463 %v1 = insertelement <2 x i64> %v0, i64 %x1, i32 1 464 ret <2 x i64> %v1 465} 466 467define <4 x i64> @loadext_4i32_to_4i64(ptr %p0) { 468; SSE2-LABEL: @loadext_4i32_to_4i64( 469; SSE2-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1 470; SSE2-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 471; SSE2-NEXT: ret <4 x i64> [[TMP3]] 472; 473; SLM-LABEL: @loadext_4i32_to_4i64( 474; SLM-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1 475; SLM-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 476; SLM-NEXT: ret <4 x i64> [[TMP3]] 477; 478; AVX-LABEL: @loadext_4i32_to_4i64( 479; AVX-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[P0:%.*]], align 1 480; AVX-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[TMP2]] to <4 x i64> 481; AVX-NEXT: ret <4 x i64> [[TMP3]] 482; 483 %p1 = getelementptr inbounds i32, ptr %p0, i64 1 484 %p2 = getelementptr inbounds i32, ptr %p0, i64 2 485 %p3 = getelementptr inbounds i32, ptr %p0, i64 3 486 %i0 = load i32, ptr %p0, align 1 487 %i1 = load i32, ptr %p1, align 1 488 %i2 = load i32, ptr %p2, align 1 489 %i3 = load i32, ptr %p3, align 1 490 %x0 = zext i32 %i0 to i64 491 %x1 = zext i32 %i1 to i64 492 %x2 = zext i32 %i2 to i64 493 %x3 = zext i32 %i3 to i64 494 %v0 = insertelement <4 x i64> undef, i64 %x0, i32 0 495 %v1 = insertelement <4 x i64> %v0, i64 %x1, i32 1 496 %v2 = insertelement <4 x i64> %v1, i64 %x2, i32 2 497 %v3 = insertelement <4 x i64> %v2, i64 %x3, i32 3 498 ret <4 x i64> %v3 499} 500