1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 --data-layout="e" | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 --data-layout="e" | FileCheck %s --check-prefixes=CHECK,AVX 4; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 --data-layout="E" | FileCheck %s --check-prefixes=CHECK,SSE 5; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 --data-layout="E" | FileCheck %s --check-prefixes=CHECK,AVX 6 7;------------------------------------------------------------------------------- 8; Here we know we can load 128 bits as per dereferenceability and alignment. 9 10; We don't widen scalar loads per-se. 11define <1 x float> @scalar(ptr align 16 dereferenceable(16) %p) { 12; CHECK-LABEL: @scalar( 13; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 14; CHECK-NEXT: ret <1 x float> [[R]] 15; 16 %r = load <1 x float>, ptr %p, align 16 17 ret <1 x float> %r 18} 19 20; We don't widen single-element loads, these get scalarized. 21define <1 x float> @vec_with_1elt(ptr align 16 dereferenceable(16) %p) { 22; CHECK-LABEL: @vec_with_1elt( 23; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 24; CHECK-NEXT: ret <1 x float> [[R]] 25; 26 %r = load <1 x float>, ptr %p, align 16 27 ret <1 x float> %r 28} 29 30define <2 x float> @vec_with_2elts(ptr align 16 dereferenceable(16) %p) { 31; CHECK-LABEL: @vec_with_2elts( 32; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 16 33; CHECK-NEXT: ret <2 x float> [[R]] 34; 35 %r = load <2 x float>, ptr %p, align 16 36 ret <2 x float> %r 37} 38 39define <3 x float> @vec_with_3elts(ptr align 16 dereferenceable(16) %p) { 40; CHECK-LABEL: @vec_with_3elts( 41; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 42; CHECK-NEXT: ret <3 x float> [[R]] 43; 44 %r = load <3 x float>, ptr %p, align 16 45 ret <3 x float> %r 46} 47 48; Full-vector load. All good already. 49define <4 x float> @vec_with_4elts(ptr align 16 dereferenceable(16) %p) { 50; CHECK-LABEL: @vec_with_4elts( 51; CHECK-NEXT: [[R:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 52; CHECK-NEXT: ret <4 x float> [[R]] 53; 54 %r = load <4 x float>, ptr %p, align 16 55 ret <4 x float> %r 56} 57 58; We don't know we can load 256 bits though. 59define <5 x float> @vec_with_5elts(ptr align 16 dereferenceable(16) %p) { 60; CHECK-LABEL: @vec_with_5elts( 61; CHECK-NEXT: [[R:%.*]] = load <5 x float>, ptr [[P:%.*]], align 16 62; CHECK-NEXT: ret <5 x float> [[R]] 63; 64 %r = load <5 x float>, ptr %p, align 16 65 ret <5 x float> %r 66} 67 68;------------------------------------------------------------------------------- 69 70; We can load 128 bits, and the fact that it's underaligned isn't relevant. 71define <3 x float> @vec_with_3elts_underaligned(ptr align 8 dereferenceable(16) %p) { 72; CHECK-LABEL: @vec_with_3elts_underaligned( 73; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 8 74; CHECK-NEXT: ret <3 x float> [[R]] 75; 76 %r = load <3 x float>, ptr %p, align 8 77 ret <3 x float> %r 78} 79 80; We don't know we can load 128 bits, but since it's aligned, we still can do wide load. 81; FIXME: this should still get widened. 82define <3 x float> @vec_with_3elts_underdereferenceable(ptr align 16 dereferenceable(12) %p) { 83; CHECK-LABEL: @vec_with_3elts_underdereferenceable( 84; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 85; CHECK-NEXT: ret <3 x float> [[R]] 86; 87 %r = load <3 x float>, ptr %p, align 16 88 ret <3 x float> %r 89} 90 91; We can't tell if we can load 128 bits. 92define <3 x float> @vec_with_3elts_underaligned_underdereferenceable(ptr align 8 dereferenceable(12) %p) { 93; CHECK-LABEL: @vec_with_3elts_underaligned_underdereferenceable( 94; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 8 95; CHECK-NEXT: ret <3 x float> [[R]] 96; 97 %r = load <3 x float>, ptr %p, align 8 98 ret <3 x float> %r 99} 100 101;------------------------------------------------------------------------------- 102; Here we know we can load 256 bits as per dereferenceability and alignment. 103 104define <1 x float> @vec_with_1elt_256bits(ptr align 32 dereferenceable(32) %p) { 105; CHECK-LABEL: @vec_with_1elt_256bits( 106; CHECK-NEXT: [[R:%.*]] = load <1 x float>, ptr [[P:%.*]], align 32 107; CHECK-NEXT: ret <1 x float> [[R]] 108; 109 %r = load <1 x float>, ptr %p, align 32 110 ret <1 x float> %r 111} 112 113define <2 x float> @vec_with_2elts_256bits(ptr align 32 dereferenceable(32) %p) { 114; CHECK-LABEL: @vec_with_2elts_256bits( 115; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr [[P:%.*]], align 32 116; CHECK-NEXT: ret <2 x float> [[R]] 117; 118 %r = load <2 x float>, ptr %p, align 32 119 ret <2 x float> %r 120} 121 122define <3 x float> @vec_with_3elts_256bits(ptr align 32 dereferenceable(32) %p) { 123; CHECK-LABEL: @vec_with_3elts_256bits( 124; CHECK-NEXT: [[R:%.*]] = load <3 x float>, ptr [[P:%.*]], align 32 125; CHECK-NEXT: ret <3 x float> [[R]] 126; 127 %r = load <3 x float>, ptr %p, align 32 128 ret <3 x float> %r 129} 130 131define <4 x float> @vec_with_4elts_256bits(ptr align 32 dereferenceable(32) %p) { 132; CHECK-LABEL: @vec_with_4elts_256bits( 133; CHECK-NEXT: [[R:%.*]] = load <4 x float>, ptr [[P:%.*]], align 32 134; CHECK-NEXT: ret <4 x float> [[R]] 135; 136 %r = load <4 x float>, ptr %p, align 32 137 ret <4 x float> %r 138} 139 140define <5 x float> @vec_with_5elts_256bits(ptr align 32 dereferenceable(32) %p) { 141; CHECK-LABEL: @vec_with_5elts_256bits( 142; CHECK-NEXT: [[R:%.*]] = load <5 x float>, ptr [[P:%.*]], align 32 143; CHECK-NEXT: ret <5 x float> [[R]] 144; 145 %r = load <5 x float>, ptr %p, align 32 146 ret <5 x float> %r 147} 148 149define <6 x float> @vec_with_6elts_256bits(ptr align 32 dereferenceable(32) %p) { 150; CHECK-LABEL: @vec_with_6elts_256bits( 151; CHECK-NEXT: [[R:%.*]] = load <6 x float>, ptr [[P:%.*]], align 32 152; CHECK-NEXT: ret <6 x float> [[R]] 153; 154 %r = load <6 x float>, ptr %p, align 32 155 ret <6 x float> %r 156} 157 158define <7 x float> @vec_with_7elts_256bits(ptr align 32 dereferenceable(32) %p) { 159; CHECK-LABEL: @vec_with_7elts_256bits( 160; CHECK-NEXT: [[R:%.*]] = load <7 x float>, ptr [[P:%.*]], align 32 161; CHECK-NEXT: ret <7 x float> [[R]] 162; 163 %r = load <7 x float>, ptr %p, align 32 164 ret <7 x float> %r 165} 166 167; Full-vector load. All good already. 168define <8 x float> @vec_with_8elts_256bits(ptr align 32 dereferenceable(32) %p) { 169; CHECK-LABEL: @vec_with_8elts_256bits( 170; CHECK-NEXT: [[R:%.*]] = load <8 x float>, ptr [[P:%.*]], align 32 171; CHECK-NEXT: ret <8 x float> [[R]] 172; 173 %r = load <8 x float>, ptr %p, align 32 174 ret <8 x float> %r 175} 176 177; We can't tell if we can load more than 256 bits. 178define <9 x float> @vec_with_9elts_256bits(ptr align 32 dereferenceable(32) %p) { 179; CHECK-LABEL: @vec_with_9elts_256bits( 180; CHECK-NEXT: [[R:%.*]] = load <9 x float>, ptr [[P:%.*]], align 32 181; CHECK-NEXT: ret <9 x float> [[R]] 182; 183 %r = load <9 x float>, ptr %p, align 32 184 ret <9 x float> %r 185} 186 187;------------------------------------------------------------------------------- 188 189; Weird types we don't deal with 190define <2 x i7> @vec_with_two_subbyte_elts(ptr align 16 dereferenceable(16) %p) { 191; CHECK-LABEL: @vec_with_two_subbyte_elts( 192; CHECK-NEXT: [[R:%.*]] = load <2 x i7>, ptr [[P:%.*]], align 16 193; CHECK-NEXT: ret <2 x i7> [[R]] 194; 195 %r = load <2 x i7>, ptr %p, align 16 196 ret <2 x i7> %r 197} 198 199define <2 x i9> @vec_with_two_nonbyte_sized_elts(ptr align 16 dereferenceable(16) %p) { 200; CHECK-LABEL: @vec_with_two_nonbyte_sized_elts( 201; CHECK-NEXT: [[R:%.*]] = load <2 x i9>, ptr [[P:%.*]], align 16 202; CHECK-NEXT: ret <2 x i9> [[R]] 203; 204 %r = load <2 x i9>, ptr %p, align 16 205 ret <2 x i9> %r 206} 207 208define <2 x i24> @vec_with_two_nonpoweroftwo_sized_elts(ptr align 16 dereferenceable(16) %p) { 209; CHECK-LABEL: @vec_with_two_nonpoweroftwo_sized_elts( 210; CHECK-NEXT: [[R:%.*]] = load <2 x i24>, ptr [[P:%.*]], align 16 211; CHECK-NEXT: ret <2 x i24> [[R]] 212; 213 %r = load <2 x i24>, ptr %p, align 16 214 ret <2 x i24> %r 215} 216 217define <2 x float> @vec_with_2elts_addressspace(ptr addrspace(2) align 16 dereferenceable(16) %p) { 218; CHECK-LABEL: @vec_with_2elts_addressspace( 219; CHECK-NEXT: [[R:%.*]] = load <2 x float>, ptr addrspace(2) [[P:%.*]], align 16 220; CHECK-NEXT: ret <2 x float> [[R]] 221; 222 %r = load <2 x float>, ptr addrspace(2) %p, align 16 223 ret <2 x float> %r 224} 225 226;------------------------------------------------------------------------------- 227 228; Widening these would change the legalized type, so leave them alone. 229 230define <2 x i1> @vec_with_2elts_128bits_i1(ptr align 16 dereferenceable(16) %p) { 231; CHECK-LABEL: @vec_with_2elts_128bits_i1( 232; CHECK-NEXT: [[R:%.*]] = load <2 x i1>, ptr [[P:%.*]], align 16 233; CHECK-NEXT: ret <2 x i1> [[R]] 234; 235 %r = load <2 x i1>, ptr %p, align 16 236 ret <2 x i1> %r 237} 238define <2 x i2> @vec_with_2elts_128bits_i2(ptr align 16 dereferenceable(16) %p) { 239; CHECK-LABEL: @vec_with_2elts_128bits_i2( 240; CHECK-NEXT: [[R:%.*]] = load <2 x i2>, ptr [[P:%.*]], align 16 241; CHECK-NEXT: ret <2 x i2> [[R]] 242; 243 %r = load <2 x i2>, ptr %p, align 16 244 ret <2 x i2> %r 245} 246define <2 x i4> @vec_with_2elts_128bits_i4(ptr align 16 dereferenceable(16) %p) { 247; CHECK-LABEL: @vec_with_2elts_128bits_i4( 248; CHECK-NEXT: [[R:%.*]] = load <2 x i4>, ptr [[P:%.*]], align 16 249; CHECK-NEXT: ret <2 x i4> [[R]] 250; 251 %r = load <2 x i4>, ptr %p, align 16 252 ret <2 x i4> %r 253} 254 255; Load the 128-bit vector because there is no additional cost. 256 257define <4 x float> @load_v1f32_v4f32(ptr dereferenceable(16) %p) { 258; CHECK-LABEL: @load_v1f32_v4f32( 259; CHECK-NEXT: [[S:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 260; CHECK-NEXT: ret <4 x float> [[S]] 261; 262 %l = load <1 x float>, ptr %p, align 16 263 %s = shufflevector <1 x float> %l, <1 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 264 ret <4 x float> %s 265} 266 267; Load the 128-bit vector because there is no additional cost. 268; Alignment is taken from param attr. 269 270define <4 x float> @load_v2f32_v4f32(ptr align 16 dereferenceable(16) %p) { 271; CHECK-LABEL: @load_v2f32_v4f32( 272; CHECK-NEXT: [[S:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 273; CHECK-NEXT: ret <4 x float> [[S]] 274; 275 %l = load <2 x float>, ptr %p, align 1 276 %s = shufflevector <2 x float> %l, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 277 ret <4 x float> %s 278} 279 280; Load the 128-bit vector because there is no additional cost. 281 282define <4 x float> @load_v3f32_v4f32(ptr dereferenceable(16) %p) { 283; CHECK-LABEL: @load_v3f32_v4f32( 284; CHECK-NEXT: [[S:%.*]] = load <4 x float>, ptr [[P:%.*]], align 1 285; CHECK-NEXT: ret <4 x float> [[S]] 286; 287 %l = load <3 x float>, ptr %p, align 1 288 %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 289 ret <4 x float> %s 290} 291 292; Negative test - the shuffle must be a simple subvector insert. 293 294define <4 x float> @load_v3f32_v4f32_wrong_mask(ptr dereferenceable(16) %p) { 295; CHECK-LABEL: @load_v3f32_v4f32_wrong_mask( 296; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 1 297; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 poison> 298; CHECK-NEXT: ret <4 x float> [[S]] 299; 300 %l = load <3 x float>, ptr %p, align 1 301 %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 undef> 302 ret <4 x float> %s 303} 304 305; Negative test - must be dereferenceable to vector width. 306 307define <4 x float> @load_v3f32_v4f32_not_deref(ptr dereferenceable(15) %p) { 308; CHECK-LABEL: @load_v3f32_v4f32_not_deref( 309; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 16 310; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 311; CHECK-NEXT: ret <4 x float> [[S]] 312; 313 %l = load <3 x float>, ptr %p, align 16 314 %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 315 ret <4 x float> %s 316} 317 318; Without AVX, the cost of loading 256-bits would be greater. 319 320define <8 x float> @load_v2f32_v8f32(ptr dereferenceable(32) %p) { 321; SSE-LABEL: @load_v2f32_v8f32( 322; SSE-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 323; SSE-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 324; SSE-NEXT: ret <8 x float> [[S]] 325; 326; AVX-LABEL: @load_v2f32_v8f32( 327; AVX-NEXT: [[S:%.*]] = load <8 x float>, ptr [[P:%.*]], align 1 328; AVX-NEXT: ret <8 x float> [[S]] 329; 330 %l = load <2 x float>, ptr %p, align 1 331 %s = shufflevector <2 x float> %l, <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 332 ret <8 x float> %s 333} 334 335; Integer type is ok too. 336 337define <4 x i32> @load_v2i32_v4i32(ptr dereferenceable(16) %p) { 338; CHECK-LABEL: @load_v2i32_v4i32( 339; CHECK-NEXT: [[S:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1 340; CHECK-NEXT: ret <4 x i32> [[S]] 341; 342 %l = load <2 x i32>, ptr %p, align 1 343 %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 344 ret <4 x i32> %s 345} 346 347; TODO: We assumed the shuffle mask is canonical. 348 349define <4 x i32> @load_v2i32_v4i32_non_canonical_mask(ptr dereferenceable(16) %p) { 350; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask( 351; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 352; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 353; CHECK-NEXT: ret <4 x i32> [[S]] 354; 355 %l = load <2 x i32>, ptr %p, align 1 356 %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 357 ret <4 x i32> %s 358} 359 360; Allow non-canonical commuted shuffle. 361 362define <4 x i32> @load_v2i32_v4i32_non_canonical_mask_commute(ptr dereferenceable(16) %p) { 363; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask_commute( 364; CHECK-NEXT: [[S:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1 365; CHECK-NEXT: ret <4 x i32> [[S]] 366; 367 %l = load <2 x i32>, ptr %p, align 1 368 %s = shufflevector <2 x i32> poison, <2 x i32> %l, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 369 ret <4 x i32> %s 370} 371 372; The wide load must be in the same addrspace as the original load. 373 374define <4 x i32> @load_v2i32_v4i32_addrspacecast(ptr addrspace(5) align 16 dereferenceable(16) %p) { 375; CHECK-LABEL: @load_v2i32_v4i32_addrspacecast( 376; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[P:%.*]] to ptr addrspace(42) 377; CHECK-NEXT: [[S:%.*]] = load <4 x i32>, ptr addrspace(42) [[TMP1]], align 16 378; CHECK-NEXT: ret <4 x i32> [[S]] 379; 380 %asc = addrspacecast ptr addrspace(5) %p to ptr addrspace(42) 381 %l = load <2 x i32>, ptr addrspace(42) %asc, align 4 382 %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 383 ret <4 x i32> %s 384} 385 386; Negative-negative tests with msan, which should be OK with widening. 387 388define <4 x float> @load_v1f32_v4f32_msan(ptr dereferenceable(16) %p) sanitize_memory { 389; CHECK-LABEL: @load_v1f32_v4f32_msan( 390; CHECK-NEXT: [[S:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 391; CHECK-NEXT: ret <4 x float> [[S]] 392; 393 %l = load <1 x float>, ptr %p, align 16 394 %s = shufflevector <1 x float> %l, <1 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 395 ret <4 x float> %s 396} 397 398; Negative tests with sanitizers. 399 400define <4 x float> @load_v1f32_v4f32_asan(ptr dereferenceable(16) %p) sanitize_address { 401; CHECK-LABEL: @load_v1f32_v4f32_asan( 402; CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[P:%.*]], align 16 403; CHECK-NEXT: [[S:%.*]] = shufflevector <1 x float> [[L]], <1 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 404; CHECK-NEXT: ret <4 x float> [[S]] 405; 406 %l = load <1 x float>, ptr %p, align 16 407 %s = shufflevector <1 x float> %l, <1 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 408 ret <4 x float> %s 409} 410 411define <4 x float> @load_v2f32_v4f32_hwasan(ptr align 16 dereferenceable(16) %p) sanitize_hwaddress { 412; CHECK-LABEL: @load_v2f32_v4f32_hwasan( 413; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 414; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 415; CHECK-NEXT: ret <4 x float> [[S]] 416; 417 %l = load <2 x float>, ptr %p, align 1 418 %s = shufflevector <2 x float> %l, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 419 ret <4 x float> %s 420} 421 422define <4 x float> @load_v3f32_v4f32_tsan(ptr dereferenceable(16) %p) sanitize_thread { 423; CHECK-LABEL: @load_v3f32_v4f32_tsan( 424; CHECK-NEXT: [[L:%.*]] = load <3 x float>, ptr [[P:%.*]], align 1 425; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x float> [[L]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 426; CHECK-NEXT: ret <4 x float> [[S]] 427; 428 %l = load <3 x float>, ptr %p, align 1 429 %s = shufflevector <3 x float> %l, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 430 ret <4 x float> %s 431} 432 433define <8 x float> @load_v2f32_v8f32_hwasan(ptr dereferenceable(32) %p) sanitize_hwaddress { 434; CHECK-LABEL: @load_v2f32_v8f32_hwasan( 435; CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[P:%.*]], align 1 436; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[L]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 437; CHECK-NEXT: ret <8 x float> [[S]] 438; 439 %l = load <2 x float>, ptr %p, align 1 440 %s = shufflevector <2 x float> %l, <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 441 ret <8 x float> %s 442} 443 444define <4 x i32> @load_v2i32_v4i32_asan(ptr dereferenceable(16) %p) sanitize_address { 445; CHECK-LABEL: @load_v2i32_v4i32_asan( 446; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 447; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[L]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 448; CHECK-NEXT: ret <4 x i32> [[S]] 449; 450 %l = load <2 x i32>, ptr %p, align 1 451 %s = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 452 ret <4 x i32> %s 453} 454 455define <4 x i32> @load_v2i32_v4i32_non_canonical_mask_commute_hwasan(ptr dereferenceable(16) %p) sanitize_hwaddress { 456; CHECK-LABEL: @load_v2i32_v4i32_non_canonical_mask_commute_hwasan( 457; CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1 458; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> poison, <2 x i32> [[L]], <4 x i32> <i32 2, i32 3, i32 poison, i32 poison> 459; CHECK-NEXT: ret <4 x i32> [[S]] 460; 461 %l = load <2 x i32>, ptr %p, align 1 462 %s = shufflevector <2 x i32> poison, <2 x i32> %l, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 463 ret <4 x i32> %s 464} 465