1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s 3 4; Test SIMD loads and stores 5 6target triple = "wasm32-unknown-unknown" 7 8; ============================================================================== 9; 16 x i8 10; ============================================================================== 11define <16 x i8> @load_v16i8(ptr %p) { 12; CHECK-LABEL: load_v16i8: 13; CHECK: .functype load_v16i8 (i32) -> (v128) 14; CHECK-NEXT: # %bb.0: 15; CHECK-NEXT: local.get 0 16; CHECK-NEXT: v128.load 0 17; CHECK-NEXT: # fallthrough-return 18 %v = load <16 x i8>, ptr %p 19 ret <16 x i8> %v 20} 21 22define <16 x i8> @load_splat_v16i8(ptr %p) { 23; CHECK-LABEL: load_splat_v16i8: 24; CHECK: .functype load_splat_v16i8 (i32) -> (v128) 25; CHECK-NEXT: # %bb.0: 26; CHECK-NEXT: local.get 0 27; CHECK-NEXT: v128.load8_splat 0 28; CHECK-NEXT: # fallthrough-return 29 %e = load i8, ptr %p 30 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 31 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 32 ret <16 x i8> %v2 33} 34 35define <16 x i8> @load_v16i8_with_folded_offset(ptr %p) { 36; CHECK-LABEL: load_v16i8_with_folded_offset: 37; CHECK: .functype load_v16i8_with_folded_offset (i32) -> (v128) 38; CHECK-NEXT: # %bb.0: 39; CHECK-NEXT: local.get 0 40; CHECK-NEXT: v128.load 16 41; CHECK-NEXT: # fallthrough-return 42 %q = ptrtoint ptr %p to i32 43 %r = add nuw i32 %q, 16 44 %s = inttoptr i32 %r to ptr 45 %v = load <16 x i8>, ptr %s 46 ret <16 x i8> %v 47} 48 49define <16 x i8> @load_splat_v16i8_with_folded_offset(ptr %p) { 50; CHECK-LABEL: load_splat_v16i8_with_folded_offset: 51; CHECK: .functype load_splat_v16i8_with_folded_offset (i32) -> (v128) 52; CHECK-NEXT: # %bb.0: 53; CHECK-NEXT: local.get 0 54; CHECK-NEXT: v128.load8_splat 16 55; CHECK-NEXT: # fallthrough-return 56 %q = ptrtoint ptr %p to i32 57 %r = add nuw i32 %q, 16 58 %s = inttoptr i32 %r to ptr 59 %e = load i8, ptr %s 60 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 61 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 62 ret <16 x i8> %v2 63} 64 65define <16 x i8> @load_v16i8_with_folded_gep_offset(ptr %p) { 66; CHECK-LABEL: load_v16i8_with_folded_gep_offset: 67; CHECK: .functype load_v16i8_with_folded_gep_offset (i32) -> (v128) 68; CHECK-NEXT: # %bb.0: 69; CHECK-NEXT: local.get 0 70; CHECK-NEXT: v128.load 16 71; CHECK-NEXT: # fallthrough-return 72 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 1 73 %v = load <16 x i8>, ptr %s 74 ret <16 x i8> %v 75} 76 77define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(ptr %p) { 78; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset: 79; CHECK: .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128) 80; CHECK-NEXT: # %bb.0: 81; CHECK-NEXT: local.get 0 82; CHECK-NEXT: v128.load8_splat 1 83; CHECK-NEXT: # fallthrough-return 84 %s = getelementptr inbounds i8, ptr %p, i32 1 85 %e = load i8, ptr %s 86 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 87 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 88 ret <16 x i8> %v2 89} 90 91define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(ptr %p) { 92; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset: 93; CHECK: .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 94; CHECK-NEXT: # %bb.0: 95; CHECK-NEXT: local.get 0 96; CHECK-NEXT: i32.const -16 97; CHECK-NEXT: i32.add 98; CHECK-NEXT: v128.load 0 99; CHECK-NEXT: # fallthrough-return 100 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 -1 101 %v = load <16 x i8>, ptr %s 102 ret <16 x i8> %v 103} 104 105define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(ptr %p) { 106; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset: 107; CHECK: .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128) 108; CHECK-NEXT: # %bb.0: 109; CHECK-NEXT: local.get 0 110; CHECK-NEXT: i32.const -1 111; CHECK-NEXT: i32.add 112; CHECK-NEXT: v128.load8_splat 0 113; CHECK-NEXT: # fallthrough-return 114 %s = getelementptr inbounds i8, ptr %p, i32 -1 115 %e = load i8, ptr %s 116 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 117 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 118 ret <16 x i8> %v2 119} 120 121define <16 x i8> @load_v16i8_with_unfolded_offset(ptr %p) { 122; CHECK-LABEL: load_v16i8_with_unfolded_offset: 123; CHECK: .functype load_v16i8_with_unfolded_offset (i32) -> (v128) 124; CHECK-NEXT: # %bb.0: 125; CHECK-NEXT: local.get 0 126; CHECK-NEXT: i32.const 16 127; CHECK-NEXT: i32.add 128; CHECK-NEXT: v128.load 0 129; CHECK-NEXT: # fallthrough-return 130 %q = ptrtoint ptr %p to i32 131 %r = add nsw i32 %q, 16 132 %s = inttoptr i32 %r to ptr 133 %v = load <16 x i8>, ptr %s 134 ret <16 x i8> %v 135} 136 137define <16 x i8> @load_splat_v16i8_with_unfolded_offset(ptr %p) { 138; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset: 139; CHECK: .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128) 140; CHECK-NEXT: # %bb.0: 141; CHECK-NEXT: local.get 0 142; CHECK-NEXT: i32.const 16 143; CHECK-NEXT: i32.add 144; CHECK-NEXT: v128.load8_splat 0 145; CHECK-NEXT: # fallthrough-return 146 %q = ptrtoint ptr %p to i32 147 %r = add nsw i32 %q, 16 148 %s = inttoptr i32 %r to ptr 149 %e = load i8, ptr %s 150 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 151 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 152 ret <16 x i8> %v2 153} 154 155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(ptr %p) { 156; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset: 157; CHECK: .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128) 158; CHECK-NEXT: # %bb.0: 159; CHECK-NEXT: local.get 0 160; CHECK-NEXT: i32.const 16 161; CHECK-NEXT: i32.add 162; CHECK-NEXT: v128.load 0 163; CHECK-NEXT: # fallthrough-return 164 %s = getelementptr <16 x i8>, ptr %p, i32 1 165 %v = load <16 x i8>, ptr %s 166 ret <16 x i8> %v 167} 168 169define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(ptr %p) { 170; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset: 171; CHECK: .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128) 172; CHECK-NEXT: # %bb.0: 173; CHECK-NEXT: local.get 0 174; CHECK-NEXT: i32.const 1 175; CHECK-NEXT: i32.add 176; CHECK-NEXT: v128.load8_splat 0 177; CHECK-NEXT: # fallthrough-return 178 %s = getelementptr i8, ptr %p, i32 1 179 %e = load i8, ptr %s 180 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 181 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 182 ret <16 x i8> %v2 183} 184 185define <16 x i8> @load_v16i8_from_numeric_address() { 186; CHECK-LABEL: load_v16i8_from_numeric_address: 187; CHECK: .functype load_v16i8_from_numeric_address () -> (v128) 188; CHECK-NEXT: # %bb.0: 189; CHECK-NEXT: i32.const 0 190; CHECK-NEXT: v128.load 32 191; CHECK-NEXT: # fallthrough-return 192 %s = inttoptr i32 32 to ptr 193 %v = load <16 x i8>, ptr %s 194 ret <16 x i8> %v 195} 196 197define <16 x i8> @load_splat_v16i8_from_numeric_address() { 198; CHECK-LABEL: load_splat_v16i8_from_numeric_address: 199; CHECK: .functype load_splat_v16i8_from_numeric_address () -> (v128) 200; CHECK-NEXT: # %bb.0: 201; CHECK-NEXT: i32.const 0 202; CHECK-NEXT: v128.load8_splat 32 203; CHECK-NEXT: # fallthrough-return 204 %s = inttoptr i32 32 to ptr 205 %e = load i8, ptr %s 206 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 207 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 208 ret <16 x i8> %v2 209} 210 211@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 212define <16 x i8> @load_v16i8_from_global_address() { 213; CHECK-LABEL: load_v16i8_from_global_address: 214; CHECK: .functype load_v16i8_from_global_address () -> (v128) 215; CHECK-NEXT: # %bb.0: 216; CHECK-NEXT: i32.const 0 217; CHECK-NEXT: v128.load gv_v16i8 218; CHECK-NEXT: # fallthrough-return 219 %v = load <16 x i8>, ptr @gv_v16i8 220 ret <16 x i8> %v 221} 222 223@gv_i8 = global i8 42 224define <16 x i8> @load_splat_v16i8_from_global_address() { 225; CHECK-LABEL: load_splat_v16i8_from_global_address: 226; CHECK: .functype load_splat_v16i8_from_global_address () -> (v128) 227; CHECK-NEXT: # %bb.0: 228; CHECK-NEXT: i32.const 0 229; CHECK-NEXT: v128.load8_splat gv_i8 230; CHECK-NEXT: # fallthrough-return 231 %e = load i8, ptr @gv_i8 232 %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 233 %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer 234 ret <16 x i8> %v2 235} 236 237define void @store_v16i8(<16 x i8> %v, ptr %p) { 238; CHECK-LABEL: store_v16i8: 239; CHECK: .functype store_v16i8 (v128, i32) -> () 240; CHECK-NEXT: # %bb.0: 241; CHECK-NEXT: local.get 1 242; CHECK-NEXT: local.get 0 243; CHECK-NEXT: v128.store 0 244; CHECK-NEXT: # fallthrough-return 245 store <16 x i8> %v , ptr %p 246 ret void 247} 248 249define void @store_v16i8_with_folded_offset(<16 x i8> %v, ptr %p) { 250; CHECK-LABEL: store_v16i8_with_folded_offset: 251; CHECK: .functype store_v16i8_with_folded_offset (v128, i32) -> () 252; CHECK-NEXT: # %bb.0: 253; CHECK-NEXT: local.get 1 254; CHECK-NEXT: local.get 0 255; CHECK-NEXT: v128.store 16 256; CHECK-NEXT: # fallthrough-return 257 %q = ptrtoint ptr %p to i32 258 %r = add nuw i32 %q, 16 259 %s = inttoptr i32 %r to ptr 260 store <16 x i8> %v , ptr %s 261 ret void 262} 263 264define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, ptr %p) { 265; CHECK-LABEL: store_v16i8_with_folded_gep_offset: 266; CHECK: .functype store_v16i8_with_folded_gep_offset (v128, i32) -> () 267; CHECK-NEXT: # %bb.0: 268; CHECK-NEXT: local.get 1 269; CHECK-NEXT: local.get 0 270; CHECK-NEXT: v128.store 16 271; CHECK-NEXT: # fallthrough-return 272 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 1 273 store <16 x i8> %v , ptr %s 274 ret void 275} 276 277define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, ptr %p) { 278; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset: 279; CHECK: .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> () 280; CHECK-NEXT: # %bb.0: 281; CHECK-NEXT: local.get 1 282; CHECK-NEXT: i32.const -16 283; CHECK-NEXT: i32.add 284; CHECK-NEXT: local.get 0 285; CHECK-NEXT: v128.store 0 286; CHECK-NEXT: # fallthrough-return 287 %s = getelementptr inbounds <16 x i8>, ptr %p, i32 -1 288 store <16 x i8> %v , ptr %s 289 ret void 290} 291 292define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, ptr %p) { 293; CHECK-LABEL: store_v16i8_with_unfolded_offset: 294; CHECK: .functype store_v16i8_with_unfolded_offset (v128, i32) -> () 295; CHECK-NEXT: # %bb.0: 296; CHECK-NEXT: local.get 1 297; CHECK-NEXT: i32.const 16 298; CHECK-NEXT: i32.add 299; CHECK-NEXT: local.get 0 300; CHECK-NEXT: v128.store 0 301; CHECK-NEXT: # fallthrough-return 302 %q = ptrtoint ptr %p to i32 303 %r = add nsw i32 %q, 16 304 %s = inttoptr i32 %r to ptr 305 store <16 x i8> %v , ptr %s 306 ret void 307} 308 309define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, ptr %p) { 310; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset: 311; CHECK: .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> () 312; CHECK-NEXT: # %bb.0: 313; CHECK-NEXT: local.get 1 314; CHECK-NEXT: i32.const 16 315; CHECK-NEXT: i32.add 316; CHECK-NEXT: local.get 0 317; CHECK-NEXT: v128.store 0 318; CHECK-NEXT: # fallthrough-return 319 %s = getelementptr <16 x i8>, ptr %p, i32 1 320 store <16 x i8> %v , ptr %s 321 ret void 322} 323 324define void @store_v16i8_to_numeric_address(<16 x i8> %v) { 325; CHECK-LABEL: store_v16i8_to_numeric_address: 326; CHECK: .functype store_v16i8_to_numeric_address (v128) -> () 327; CHECK-NEXT: # %bb.0: 328; CHECK-NEXT: i32.const 0 329; CHECK-NEXT: local.get 0 330; CHECK-NEXT: v128.store 32 331; CHECK-NEXT: # fallthrough-return 332 %s = inttoptr i32 32 to ptr 333 store <16 x i8> %v , ptr %s 334 ret void 335} 336 337define void @store_v16i8_to_global_address(<16 x i8> %v) { 338; CHECK-LABEL: store_v16i8_to_global_address: 339; CHECK: .functype store_v16i8_to_global_address (v128) -> () 340; CHECK-NEXT: # %bb.0: 341; CHECK-NEXT: i32.const 0 342; CHECK-NEXT: local.get 0 343; CHECK-NEXT: v128.store gv_v16i8 344; CHECK-NEXT: # fallthrough-return 345 store <16 x i8> %v , ptr @gv_v16i8 346 ret void 347} 348 349; ============================================================================== 350; 8 x i16 351; ============================================================================== 352define <8 x i16> @load_v8i16(ptr %p) { 353; CHECK-LABEL: load_v8i16: 354; CHECK: .functype load_v8i16 (i32) -> (v128) 355; CHECK-NEXT: # %bb.0: 356; CHECK-NEXT: local.get 0 357; CHECK-NEXT: v128.load 0 358; CHECK-NEXT: # fallthrough-return 359 %v = load <8 x i16>, ptr %p 360 ret <8 x i16> %v 361} 362 363define <8 x i16> @load_splat_v8i16(ptr %p) { 364; CHECK-LABEL: load_splat_v8i16: 365; CHECK: .functype load_splat_v8i16 (i32) -> (v128) 366; CHECK-NEXT: # %bb.0: 367; CHECK-NEXT: local.get 0 368; CHECK-NEXT: v128.load16_splat 0 369; CHECK-NEXT: # fallthrough-return 370 %e = load i16, ptr %p 371 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 372 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 373 ret <8 x i16> %v2 374} 375 376define <8 x i16> @load_sext_v8i16(ptr %p) { 377; CHECK-LABEL: load_sext_v8i16: 378; CHECK: .functype load_sext_v8i16 (i32) -> (v128) 379; CHECK-NEXT: # %bb.0: 380; CHECK-NEXT: local.get 0 381; CHECK-NEXT: i16x8.load8x8_s 0 382; CHECK-NEXT: # fallthrough-return 383 %v = load <8 x i8>, ptr %p 384 %v2 = sext <8 x i8> %v to <8 x i16> 385 ret <8 x i16> %v2 386} 387 388define <8 x i16> @load_zext_v8i16(ptr %p) { 389; CHECK-LABEL: load_zext_v8i16: 390; CHECK: .functype load_zext_v8i16 (i32) -> (v128) 391; CHECK-NEXT: # %bb.0: 392; CHECK-NEXT: local.get 0 393; CHECK-NEXT: i16x8.load8x8_u 0 394; CHECK-NEXT: # fallthrough-return 395 %v = load <8 x i8>, ptr %p 396 %v2 = zext <8 x i8> %v to <8 x i16> 397 ret <8 x i16> %v2 398} 399 400define <8 x i8> @load_ext_v8i16(ptr %p) { 401; CHECK-LABEL: load_ext_v8i16: 402; CHECK: .functype load_ext_v8i16 (i32) -> (v128) 403; CHECK-NEXT: # %bb.0: 404; CHECK-NEXT: local.get 0 405; CHECK-NEXT: v128.load64_zero 0 406; CHECK-NEXT: # fallthrough-return 407 %v = load <8 x i8>, ptr %p 408 ret <8 x i8> %v 409} 410 411define <8 x i16> @load_v8i16_with_folded_offset(ptr %p) { 412; CHECK-LABEL: load_v8i16_with_folded_offset: 413; CHECK: .functype load_v8i16_with_folded_offset (i32) -> (v128) 414; CHECK-NEXT: # %bb.0: 415; CHECK-NEXT: local.get 0 416; CHECK-NEXT: v128.load 16 417; CHECK-NEXT: # fallthrough-return 418 %q = ptrtoint ptr %p to i32 419 %r = add nuw i32 %q, 16 420 %s = inttoptr i32 %r to ptr 421 %v = load <8 x i16>, ptr %s 422 ret <8 x i16> %v 423} 424 425define <8 x i16> @load_splat_v8i16_with_folded_offset(ptr %p) { 426; CHECK-LABEL: load_splat_v8i16_with_folded_offset: 427; CHECK: .functype load_splat_v8i16_with_folded_offset (i32) -> (v128) 428; CHECK-NEXT: # %bb.0: 429; CHECK-NEXT: local.get 0 430; CHECK-NEXT: v128.load16_splat 16 431; CHECK-NEXT: # fallthrough-return 432 %q = ptrtoint ptr %p to i32 433 %r = add nuw i32 %q, 16 434 %s = inttoptr i32 %r to ptr 435 %e = load i16, ptr %s 436 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 437 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 438 ret <8 x i16> %v2 439} 440 441define <8 x i16> @load_sext_v8i16_with_folded_offset(ptr %p) { 442; CHECK-LABEL: load_sext_v8i16_with_folded_offset: 443; CHECK: .functype load_sext_v8i16_with_folded_offset (i32) -> (v128) 444; CHECK-NEXT: # %bb.0: 445; CHECK-NEXT: local.get 0 446; CHECK-NEXT: i16x8.load8x8_s 16 447; CHECK-NEXT: # fallthrough-return 448 %q = ptrtoint ptr %p to i32 449 %r = add nuw i32 %q, 16 450 %s = inttoptr i32 %r to ptr 451 %v = load <8 x i8>, ptr %s 452 %v2 = sext <8 x i8> %v to <8 x i16> 453 ret <8 x i16> %v2 454} 455 456define <8 x i16> @load_zext_v8i16_with_folded_offset(ptr %p) { 457; CHECK-LABEL: load_zext_v8i16_with_folded_offset: 458; CHECK: .functype load_zext_v8i16_with_folded_offset (i32) -> (v128) 459; CHECK-NEXT: # %bb.0: 460; CHECK-NEXT: local.get 0 461; CHECK-NEXT: i16x8.load8x8_u 16 462; CHECK-NEXT: # fallthrough-return 463 %q = ptrtoint ptr %p to i32 464 %r = add nuw i32 %q, 16 465 %s = inttoptr i32 %r to ptr 466 %v = load <8 x i8>, ptr %s 467 %v2 = zext <8 x i8> %v to <8 x i16> 468 ret <8 x i16> %v2 469} 470 471define <8 x i8> @load_ext_v8i16_with_folded_offset(ptr %p) { 472; CHECK-LABEL: load_ext_v8i16_with_folded_offset: 473; CHECK: .functype load_ext_v8i16_with_folded_offset (i32) -> (v128) 474; CHECK-NEXT: # %bb.0: 475; CHECK-NEXT: local.get 0 476; CHECK-NEXT: v128.load64_zero 16 477; CHECK-NEXT: # fallthrough-return 478 %q = ptrtoint ptr %p to i32 479 %r = add nuw i32 %q, 16 480 %s = inttoptr i32 %r to ptr 481 %v = load <8 x i8>, ptr %s 482 ret <8 x i8> %v 483} 484 485define <8 x i16> @load_v8i16_with_folded_gep_offset(ptr %p) { 486; CHECK-LABEL: load_v8i16_with_folded_gep_offset: 487; CHECK: .functype load_v8i16_with_folded_gep_offset (i32) -> (v128) 488; CHECK-NEXT: # %bb.0: 489; CHECK-NEXT: local.get 0 490; CHECK-NEXT: v128.load 16 491; CHECK-NEXT: # fallthrough-return 492 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 1 493 %v = load <8 x i16>, ptr %s 494 ret <8 x i16> %v 495} 496 497define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(ptr %p) { 498; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset: 499; CHECK: .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128) 500; CHECK-NEXT: # %bb.0: 501; CHECK-NEXT: local.get 0 502; CHECK-NEXT: v128.load16_splat 2 503; CHECK-NEXT: # fallthrough-return 504 %s = getelementptr inbounds i16, ptr %p, i32 1 505 %e = load i16, ptr %s 506 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 507 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 508 ret <8 x i16> %v2 509} 510 511define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(ptr %p) { 512; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset: 513; CHECK: .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128) 514; CHECK-NEXT: # %bb.0: 515; CHECK-NEXT: local.get 0 516; CHECK-NEXT: i16x8.load8x8_s 8 517; CHECK-NEXT: # fallthrough-return 518 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1 519 %v = load <8 x i8>, ptr %s 520 %v2 = sext <8 x i8> %v to <8 x i16> 521 ret <8 x i16> %v2 522} 523 524define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(ptr %p) { 525; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset: 526; CHECK: .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128) 527; CHECK-NEXT: # %bb.0: 528; CHECK-NEXT: local.get 0 529; CHECK-NEXT: i16x8.load8x8_u 8 530; CHECK-NEXT: # fallthrough-return 531 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1 532 %v = load <8 x i8>, ptr %s 533 %v2 = zext <8 x i8> %v to <8 x i16> 534 ret <8 x i16> %v2 535} 536 537define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(ptr %p) { 538; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset: 539; CHECK: .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128) 540; CHECK-NEXT: # %bb.0: 541; CHECK-NEXT: local.get 0 542; CHECK-NEXT: v128.load64_zero 8 543; CHECK-NEXT: # fallthrough-return 544 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1 545 %v = load <8 x i8>, ptr %s 546 ret <8 x i8> %v 547} 548 549define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(ptr %p) { 550; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset: 551; CHECK: .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 552; CHECK-NEXT: # %bb.0: 553; CHECK-NEXT: local.get 0 554; CHECK-NEXT: i32.const -16 555; CHECK-NEXT: i32.add 556; CHECK-NEXT: v128.load 0 557; CHECK-NEXT: # fallthrough-return 558 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 -1 559 %v = load <8 x i16>, ptr %s 560 ret <8 x i16> %v 561} 562 563define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(ptr %p) { 564; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset: 565; CHECK: .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 566; CHECK-NEXT: # %bb.0: 567; CHECK-NEXT: local.get 0 568; CHECK-NEXT: i32.const -2 569; CHECK-NEXT: i32.add 570; CHECK-NEXT: v128.load16_splat 0 571; CHECK-NEXT: # fallthrough-return 572 %s = getelementptr inbounds i16, ptr %p, i32 -1 573 %e = load i16, ptr %s 574 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 575 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 576 ret <8 x i16> %v2 577} 578 579define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(ptr %p) { 580; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset: 581; CHECK: .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 582; CHECK-NEXT: # %bb.0: 583; CHECK-NEXT: local.get 0 584; CHECK-NEXT: i32.const -8 585; CHECK-NEXT: i32.add 586; CHECK-NEXT: i16x8.load8x8_s 0 587; CHECK-NEXT: # fallthrough-return 588 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1 589 %v = load <8 x i8>, ptr %s 590 %v2 = sext <8 x i8> %v to <8 x i16> 591 ret <8 x i16> %v2 592} 593 594define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(ptr %p) { 595; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset: 596; CHECK: .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 597; CHECK-NEXT: # %bb.0: 598; CHECK-NEXT: local.get 0 599; CHECK-NEXT: i32.const -8 600; CHECK-NEXT: i32.add 601; CHECK-NEXT: i16x8.load8x8_u 0 602; CHECK-NEXT: # fallthrough-return 603 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1 604 %v = load <8 x i8>, ptr %s 605 %v2 = zext <8 x i8> %v to <8 x i16> 606 ret <8 x i16> %v2 607} 608 609define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(ptr %p) { 610; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset: 611; CHECK: .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128) 612; CHECK-NEXT: # %bb.0: 613; CHECK-NEXT: local.get 0 614; CHECK-NEXT: i32.const -8 615; CHECK-NEXT: i32.add 616; CHECK-NEXT: v128.load64_zero 0 617; CHECK-NEXT: # fallthrough-return 618 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1 619 %v = load <8 x i8>, ptr %s 620 ret <8 x i8> %v 621} 622 623define <8 x i16> @load_v8i16_with_unfolded_offset(ptr %p) { 624; CHECK-LABEL: load_v8i16_with_unfolded_offset: 625; CHECK: .functype load_v8i16_with_unfolded_offset (i32) -> (v128) 626; CHECK-NEXT: # %bb.0: 627; CHECK-NEXT: local.get 0 628; CHECK-NEXT: i32.const 16 629; CHECK-NEXT: i32.add 630; CHECK-NEXT: v128.load 0 631; CHECK-NEXT: # fallthrough-return 632 %q = ptrtoint ptr %p to i32 633 %r = add nsw i32 %q, 16 634 %s = inttoptr i32 %r to ptr 635 %v = load <8 x i16>, ptr %s 636 ret <8 x i16> %v 637} 638 639define <8 x i16> @load_splat_v8i16_with_unfolded_offset(ptr %p) { 640; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset: 641; CHECK: .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128) 642; CHECK-NEXT: # %bb.0: 643; CHECK-NEXT: local.get 0 644; CHECK-NEXT: i32.const 16 645; CHECK-NEXT: i32.add 646; CHECK-NEXT: v128.load16_splat 0 647; CHECK-NEXT: # fallthrough-return 648 %q = ptrtoint ptr %p to i32 649 %r = add nsw i32 %q, 16 650 %s = inttoptr i32 %r to ptr 651 %e = load i16, ptr %s 652 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 653 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 654 ret <8 x i16> %v2 655} 656 657define <8 x i16> @load_sext_v8i16_with_unfolded_offset(ptr %p) { 658; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset: 659; CHECK: .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128) 660; CHECK-NEXT: # %bb.0: 661; CHECK-NEXT: local.get 0 662; CHECK-NEXT: i32.const 16 663; CHECK-NEXT: i32.add 664; CHECK-NEXT: i16x8.load8x8_s 0 665; CHECK-NEXT: # fallthrough-return 666 %q = ptrtoint ptr %p to i32 667 %r = add nsw i32 %q, 16 668 %s = inttoptr i32 %r to ptr 669 %v = load <8 x i8>, ptr %s 670 %v2 = sext <8 x i8> %v to <8 x i16> 671 ret <8 x i16> %v2 672} 673 674define <8 x i16> @load_zext_v8i16_with_unfolded_offset(ptr %p) { 675; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset: 676; CHECK: .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128) 677; CHECK-NEXT: # %bb.0: 678; CHECK-NEXT: local.get 0 679; CHECK-NEXT: i32.const 16 680; CHECK-NEXT: i32.add 681; CHECK-NEXT: i16x8.load8x8_u 0 682; CHECK-NEXT: # fallthrough-return 683 %q = ptrtoint ptr %p to i32 684 %r = add nsw i32 %q, 16 685 %s = inttoptr i32 %r to ptr 686 %v = load <8 x i8>, ptr %s 687 %v2 = zext <8 x i8> %v to <8 x i16> 688 ret <8 x i16> %v2 689} 690 691define <8 x i8> @load_ext_v8i16_with_unfolded_offset(ptr %p) { 692; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset: 693; CHECK: .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128) 694; CHECK-NEXT: # %bb.0: 695; CHECK-NEXT: local.get 0 696; CHECK-NEXT: i32.const 16 697; CHECK-NEXT: i32.add 698; CHECK-NEXT: v128.load64_zero 0 699; CHECK-NEXT: # fallthrough-return 700 %q = ptrtoint ptr %p to i32 701 %r = add nsw i32 %q, 16 702 %s = inttoptr i32 %r to ptr 703 %v = load <8 x i8>, ptr %s 704 ret <8 x i8> %v 705} 706 707define <8 x i16> @load_v8i16_with_unfolded_gep_offset(ptr %p) { 708; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset: 709; CHECK: .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128) 710; CHECK-NEXT: # %bb.0: 711; CHECK-NEXT: local.get 0 712; CHECK-NEXT: i32.const 16 713; CHECK-NEXT: i32.add 714; CHECK-NEXT: v128.load 0 715; CHECK-NEXT: # fallthrough-return 716 %s = getelementptr <8 x i16>, ptr %p, i32 1 717 %v = load <8 x i16>, ptr %s 718 ret <8 x i16> %v 719} 720 721define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(ptr %p) { 722; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset: 723; CHECK: .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128) 724; CHECK-NEXT: # %bb.0: 725; CHECK-NEXT: local.get 0 726; CHECK-NEXT: i32.const 2 727; CHECK-NEXT: i32.add 728; CHECK-NEXT: v128.load16_splat 0 729; CHECK-NEXT: # fallthrough-return 730 %s = getelementptr i16, ptr %p, i32 1 731 %e = load i16, ptr %s 732 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 733 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 734 ret <8 x i16> %v2 735} 736 737define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(ptr %p) { 738; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset: 739; CHECK: .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 740; CHECK-NEXT: # %bb.0: 741; CHECK-NEXT: local.get 0 742; CHECK-NEXT: i32.const 8 743; CHECK-NEXT: i32.add 744; CHECK-NEXT: i16x8.load8x8_s 0 745; CHECK-NEXT: # fallthrough-return 746 %s = getelementptr <8 x i8>, ptr %p, i32 1 747 %v = load <8 x i8>, ptr %s 748 %v2 = sext <8 x i8> %v to <8 x i16> 749 ret <8 x i16> %v2 750} 751 752define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(ptr %p) { 753; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset: 754; CHECK: .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 755; CHECK-NEXT: # %bb.0: 756; CHECK-NEXT: local.get 0 757; CHECK-NEXT: i32.const 8 758; CHECK-NEXT: i32.add 759; CHECK-NEXT: i16x8.load8x8_u 0 760; CHECK-NEXT: # fallthrough-return 761 %s = getelementptr <8 x i8>, ptr %p, i32 1 762 %v = load <8 x i8>, ptr %s 763 %v2 = zext <8 x i8> %v to <8 x i16> 764 ret <8 x i16> %v2 765} 766 767define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(ptr %p) { 768; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset: 769; CHECK: .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128) 770; CHECK-NEXT: # %bb.0: 771; CHECK-NEXT: local.get 0 772; CHECK-NEXT: i32.const 8 773; CHECK-NEXT: i32.add 774; CHECK-NEXT: v128.load64_zero 0 775; CHECK-NEXT: # fallthrough-return 776 %s = getelementptr <8 x i8>, ptr %p, i32 1 777 %v = load <8 x i8>, ptr %s 778 ret <8 x i8> %v 779} 780 781define <8 x i16> @load_v8i16_from_numeric_address() { 782; CHECK-LABEL: load_v8i16_from_numeric_address: 783; CHECK: .functype load_v8i16_from_numeric_address () -> (v128) 784; CHECK-NEXT: # %bb.0: 785; CHECK-NEXT: i32.const 0 786; CHECK-NEXT: v128.load 32 787; CHECK-NEXT: # fallthrough-return 788 %s = inttoptr i32 32 to ptr 789 %v = load <8 x i16>, ptr %s 790 ret <8 x i16> %v 791} 792 793define <8 x i16> @load_splat_v8i16_from_numeric_address() { 794; CHECK-LABEL: load_splat_v8i16_from_numeric_address: 795; CHECK: .functype load_splat_v8i16_from_numeric_address () -> (v128) 796; CHECK-NEXT: # %bb.0: 797; CHECK-NEXT: i32.const 0 798; CHECK-NEXT: v128.load16_splat 32 799; CHECK-NEXT: # fallthrough-return 800 %s = inttoptr i32 32 to ptr 801 %e = load i16, ptr %s 802 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 803 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 804 ret <8 x i16> %v2 805} 806 807define <8 x i16> @load_sext_v8i16_from_numeric_address() { 808; CHECK-LABEL: load_sext_v8i16_from_numeric_address: 809; CHECK: .functype load_sext_v8i16_from_numeric_address () -> (v128) 810; CHECK-NEXT: # %bb.0: 811; CHECK-NEXT: i32.const 0 812; CHECK-NEXT: i16x8.load8x8_s 32 813; CHECK-NEXT: # fallthrough-return 814 %s = inttoptr i32 32 to ptr 815 %v = load <8 x i8>, ptr %s 816 %v2 = sext <8 x i8> %v to <8 x i16> 817 ret <8 x i16> %v2 818} 819 820define <8 x i16> @load_zext_v8i16_from_numeric_address() { 821; CHECK-LABEL: load_zext_v8i16_from_numeric_address: 822; CHECK: .functype load_zext_v8i16_from_numeric_address () -> (v128) 823; CHECK-NEXT: # %bb.0: 824; CHECK-NEXT: i32.const 0 825; CHECK-NEXT: i16x8.load8x8_u 32 826; CHECK-NEXT: # fallthrough-return 827 %s = inttoptr i32 32 to ptr 828 %v = load <8 x i8>, ptr %s 829 %v2 = zext <8 x i8> %v to <8 x i16> 830 ret <8 x i16> %v2 831} 832 833define <8 x i8> @load_ext_v8i16_from_numeric_address() { 834; CHECK-LABEL: load_ext_v8i16_from_numeric_address: 835; CHECK: .functype load_ext_v8i16_from_numeric_address () -> (v128) 836; CHECK-NEXT: # %bb.0: 837; CHECK-NEXT: i32.const 0 838; CHECK-NEXT: v128.load64_zero 32 839; CHECK-NEXT: # fallthrough-return 840 %s = inttoptr i32 32 to ptr 841 %v = load <8 x i8>, ptr %s 842 ret <8 x i8> %v 843} 844 845@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42> 846define <8 x i16> @load_v8i16_from_global_address() { 847; CHECK-LABEL: load_v8i16_from_global_address: 848; CHECK: .functype load_v8i16_from_global_address () -> (v128) 849; CHECK-NEXT: # %bb.0: 850; CHECK-NEXT: i32.const 0 851; CHECK-NEXT: v128.load gv_v8i16 852; CHECK-NEXT: # fallthrough-return 853 %v = load <8 x i16>, ptr @gv_v8i16 854 ret <8 x i16> %v 855} 856 857@gv_i16 = global i16 42 858define <8 x i16> @load_splat_v8i16_from_global_address() { 859; CHECK-LABEL: load_splat_v8i16_from_global_address: 860; CHECK: .functype load_splat_v8i16_from_global_address () -> (v128) 861; CHECK-NEXT: # %bb.0: 862; CHECK-NEXT: i32.const 0 863; CHECK-NEXT: v128.load16_splat gv_i16 864; CHECK-NEXT: # fallthrough-return 865 %e = load i16, ptr @gv_i16 866 %v1 = insertelement <8 x i16> undef, i16 %e, i32 0 867 %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer 868 ret <8 x i16> %v2 869} 870 871@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42> 872define <8 x i16> @load_sext_v8i16_from_global_address() { 873; CHECK-LABEL: load_sext_v8i16_from_global_address: 874; CHECK: .functype load_sext_v8i16_from_global_address () -> (v128) 875; CHECK-NEXT: # %bb.0: 876; CHECK-NEXT: i32.const 0 877; CHECK-NEXT: i16x8.load8x8_s gv_v8i8 878; CHECK-NEXT: # fallthrough-return 879 %v = load <8 x i8>, ptr @gv_v8i8 880 %v2 = sext <8 x i8> %v to <8 x i16> 881 ret <8 x i16> %v2 882} 883 884define <8 x i16> @load_zext_v8i16_from_global_address() { 885; CHECK-LABEL: load_zext_v8i16_from_global_address: 886; CHECK: .functype load_zext_v8i16_from_global_address () -> (v128) 887; CHECK-NEXT: # %bb.0: 888; CHECK-NEXT: i32.const 0 889; CHECK-NEXT: i16x8.load8x8_u gv_v8i8 890; CHECK-NEXT: # fallthrough-return 891 %v = load <8 x i8>, ptr @gv_v8i8 892 %v2 = zext <8 x i8> %v to <8 x i16> 893 ret <8 x i16> %v2 894} 895 896define <8 x i8> @load_ext_v8i16_from_global_address() { 897; CHECK-LABEL: load_ext_v8i16_from_global_address: 898; CHECK: .functype load_ext_v8i16_from_global_address () -> (v128) 899; CHECK-NEXT: # %bb.0: 900; CHECK-NEXT: i32.const 0 901; CHECK-NEXT: v128.load64_zero gv_v8i8 902; CHECK-NEXT: # fallthrough-return 903 %v = load <8 x i8>, ptr @gv_v8i8 904 ret <8 x i8> %v 905} 906 907 908define void @store_v8i16(<8 x i16> %v, ptr %p) { 909; CHECK-LABEL: store_v8i16: 910; CHECK: .functype store_v8i16 (v128, i32) -> () 911; CHECK-NEXT: # %bb.0: 912; CHECK-NEXT: local.get 1 913; CHECK-NEXT: local.get 0 914; CHECK-NEXT: v128.store 0 915; CHECK-NEXT: # fallthrough-return 916 store <8 x i16> %v , ptr %p 917 ret void 918} 919 920define void @store_narrowing_v8i16(<8 x i8> %v, ptr %p) { 921; CHECK-LABEL: store_narrowing_v8i16: 922; CHECK: .functype store_narrowing_v8i16 (v128, i32) -> () 923; CHECK-NEXT: # %bb.0: 924; CHECK-NEXT: local.get 1 925; CHECK-NEXT: local.get 0 926; CHECK-NEXT: v128.store64_lane 0, 0 927; CHECK-NEXT: # fallthrough-return 928 store <8 x i8> %v, ptr %p 929 ret void 930} 931 932define void @store_v8i16_with_folded_offset(<8 x i16> %v, ptr %p) { 933; CHECK-LABEL: store_v8i16_with_folded_offset: 934; CHECK: .functype store_v8i16_with_folded_offset (v128, i32) -> () 935; CHECK-NEXT: # %bb.0: 936; CHECK-NEXT: local.get 1 937; CHECK-NEXT: local.get 0 938; CHECK-NEXT: v128.store 16 939; CHECK-NEXT: # fallthrough-return 940 %q = ptrtoint ptr %p to i32 941 %r = add nuw i32 %q, 16 942 %s = inttoptr i32 %r to ptr 943 store <8 x i16> %v , ptr %s 944 ret void 945} 946 947define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, ptr %p) { 948; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset: 949; CHECK: .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> () 950; CHECK-NEXT: # %bb.0: 951; CHECK-NEXT: local.get 1 952; CHECK-NEXT: local.get 0 953; CHECK-NEXT: v128.store64_lane 16, 0 954; CHECK-NEXT: # fallthrough-return 955 %q = ptrtoint ptr %p to i32 956 %r = add nuw i32 %q, 16 957 %s = inttoptr i32 %r to ptr 958 store <8 x i8> %v , ptr %s 959 ret void 960} 961 962define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, ptr %p) { 963; CHECK-LABEL: store_v8i16_with_folded_gep_offset: 964; CHECK: .functype store_v8i16_with_folded_gep_offset (v128, i32) -> () 965; CHECK-NEXT: # %bb.0: 966; CHECK-NEXT: local.get 1 967; CHECK-NEXT: local.get 0 968; CHECK-NEXT: v128.store 16 969; CHECK-NEXT: # fallthrough-return 970 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 1 971 store <8 x i16> %v , ptr %s 972 ret void 973} 974 975define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, ptr %p) { 976; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset: 977; CHECK: .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> () 978; CHECK-NEXT: # %bb.0: 979; CHECK-NEXT: local.get 1 980; CHECK-NEXT: local.get 0 981; CHECK-NEXT: v128.store64_lane 8, 0 982; CHECK-NEXT: # fallthrough-return 983 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1 984 store <8 x i8> %v , ptr %s 985 ret void 986} 987 988define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, ptr %p) { 989; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset: 990; CHECK: .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 991; CHECK-NEXT: # %bb.0: 992; CHECK-NEXT: local.get 1 993; CHECK-NEXT: i32.const -16 994; CHECK-NEXT: i32.add 995; CHECK-NEXT: local.get 0 996; CHECK-NEXT: v128.store 0 997; CHECK-NEXT: # fallthrough-return 998 %s = getelementptr inbounds <8 x i16>, ptr %p, i32 -1 999 store <8 x i16> %v , ptr %s 1000 ret void 1001} 1002 1003define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, ptr %p) { 1004; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset: 1005; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> () 1006; CHECK-NEXT: # %bb.0: 1007; CHECK-NEXT: local.get 1 1008; CHECK-NEXT: i32.const -8 1009; CHECK-NEXT: i32.add 1010; CHECK-NEXT: local.get 0 1011; CHECK-NEXT: v128.store64_lane 0, 0 1012; CHECK-NEXT: # fallthrough-return 1013 %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1 1014 store <8 x i8> %v , ptr %s 1015 ret void 1016} 1017 1018define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, ptr %p) { 1019; CHECK-LABEL: store_v8i16_with_unfolded_offset: 1020; CHECK: .functype store_v8i16_with_unfolded_offset (v128, i32) -> () 1021; CHECK-NEXT: # %bb.0: 1022; CHECK-NEXT: local.get 1 1023; CHECK-NEXT: i32.const 16 1024; CHECK-NEXT: i32.add 1025; CHECK-NEXT: local.get 0 1026; CHECK-NEXT: v128.store 0 1027; CHECK-NEXT: # fallthrough-return 1028 %q = ptrtoint ptr %p to i32 1029 %r = add nsw i32 %q, 16 1030 %s = inttoptr i32 %r to ptr 1031 store <8 x i16> %v , ptr %s 1032 ret void 1033} 1034 1035define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, ptr %p) { 1036; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset: 1037; CHECK: .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> () 1038; CHECK-NEXT: # %bb.0: 1039; CHECK-NEXT: local.get 1 1040; CHECK-NEXT: i32.const 16 1041; CHECK-NEXT: i32.add 1042; CHECK-NEXT: local.get 0 1043; CHECK-NEXT: v128.store64_lane 0, 0 1044; CHECK-NEXT: # fallthrough-return 1045 %q = ptrtoint ptr %p to i32 1046 %r = add nsw i32 %q, 16 1047 %s = inttoptr i32 %r to ptr 1048 store <8 x i8> %v , ptr %s 1049 ret void 1050} 1051 1052define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, ptr %p) { 1053; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset: 1054; CHECK: .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1055; CHECK-NEXT: # %bb.0: 1056; CHECK-NEXT: local.get 1 1057; CHECK-NEXT: i32.const 16 1058; CHECK-NEXT: i32.add 1059; CHECK-NEXT: local.get 0 1060; CHECK-NEXT: v128.store 0 1061; CHECK-NEXT: # fallthrough-return 1062 %s = getelementptr <8 x i16>, ptr %p, i32 1 1063 store <8 x i16> %v , ptr %s 1064 ret void 1065} 1066 1067define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, ptr %p) { 1068; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset: 1069; CHECK: .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> () 1070; CHECK-NEXT: # %bb.0: 1071; CHECK-NEXT: local.get 1 1072; CHECK-NEXT: i32.const 8 1073; CHECK-NEXT: i32.add 1074; CHECK-NEXT: local.get 0 1075; CHECK-NEXT: v128.store64_lane 0, 0 1076; CHECK-NEXT: # fallthrough-return 1077 %s = getelementptr <8 x i8>, ptr %p, i32 1 1078 store <8 x i8> %v , ptr %s 1079 ret void 1080} 1081 1082define void @store_v8i16_to_numeric_address(<8 x i16> %v) { 1083; CHECK-LABEL: store_v8i16_to_numeric_address: 1084; CHECK: .functype store_v8i16_to_numeric_address (v128) -> () 1085; CHECK-NEXT: # %bb.0: 1086; CHECK-NEXT: i32.const 0 1087; CHECK-NEXT: local.get 0 1088; CHECK-NEXT: v128.store 32 1089; CHECK-NEXT: # fallthrough-return 1090 %s = inttoptr i32 32 to ptr 1091 store <8 x i16> %v , ptr %s 1092 ret void 1093} 1094 1095define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, ptr %p) { 1096; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address: 1097; CHECK: .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> () 1098; CHECK-NEXT: # %bb.0: 1099; CHECK-NEXT: i32.const 0 1100; CHECK-NEXT: local.get 0 1101; CHECK-NEXT: v128.store64_lane 32, 0 1102; CHECK-NEXT: # fallthrough-return 1103 %s = inttoptr i32 32 to ptr 1104 store <8 x i8> %v , ptr %s 1105 ret void 1106} 1107 1108define void @store_v8i16_to_global_address(<8 x i16> %v) { 1109; CHECK-LABEL: store_v8i16_to_global_address: 1110; CHECK: .functype store_v8i16_to_global_address (v128) -> () 1111; CHECK-NEXT: # %bb.0: 1112; CHECK-NEXT: i32.const 0 1113; CHECK-NEXT: local.get 0 1114; CHECK-NEXT: v128.store gv_v8i16 1115; CHECK-NEXT: # fallthrough-return 1116 store <8 x i16> %v , ptr @gv_v8i16 1117 ret void 1118} 1119 1120define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) { 1121; CHECK-LABEL: store_narrowing_v8i16_to_global_address: 1122; CHECK: .functype store_narrowing_v8i16_to_global_address (v128) -> () 1123; CHECK-NEXT: # %bb.0: 1124; CHECK-NEXT: i32.const 0 1125; CHECK-NEXT: local.get 0 1126; CHECK-NEXT: v128.store64_lane gv_v8i8, 0 1127; CHECK-NEXT: # fallthrough-return 1128 store <8 x i8> %v , ptr @gv_v8i8 1129 ret void 1130} 1131 1132; ============================================================================== 1133; 4 x i32 1134; ============================================================================== 1135define <4 x i32> @load_v4i32(ptr %p) { 1136; CHECK-LABEL: load_v4i32: 1137; CHECK: .functype load_v4i32 (i32) -> (v128) 1138; CHECK-NEXT: # %bb.0: 1139; CHECK-NEXT: local.get 0 1140; CHECK-NEXT: v128.load 0 1141; CHECK-NEXT: # fallthrough-return 1142 %v = load <4 x i32>, ptr %p 1143 ret <4 x i32> %v 1144} 1145 1146define <4 x i32> @load_splat_v4i32(ptr %addr) { 1147; CHECK-LABEL: load_splat_v4i32: 1148; CHECK: .functype load_splat_v4i32 (i32) -> (v128) 1149; CHECK-NEXT: # %bb.0: 1150; CHECK-NEXT: local.get 0 1151; CHECK-NEXT: v128.load32_splat 0 1152; CHECK-NEXT: # fallthrough-return 1153 %e = load i32, ptr %addr, align 4 1154 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1155 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1156 ret <4 x i32> %v2 1157} 1158 1159define <4 x i32> @load_sext_v4i16_to_v4i32(ptr %p) { 1160; CHECK-LABEL: load_sext_v4i16_to_v4i32: 1161; CHECK: .functype load_sext_v4i16_to_v4i32 (i32) -> (v128) 1162; CHECK-NEXT: # %bb.0: 1163; CHECK-NEXT: local.get 0 1164; CHECK-NEXT: i32x4.load16x4_s 0 1165; CHECK-NEXT: # fallthrough-return 1166 %v = load <4 x i16>, ptr %p 1167 %v2 = sext <4 x i16> %v to <4 x i32> 1168 ret <4 x i32> %v2 1169} 1170 1171define <4 x i32> @load_zext_v4i16_to_v4i32(ptr %p) { 1172; CHECK-LABEL: load_zext_v4i16_to_v4i32: 1173; CHECK: .functype load_zext_v4i16_to_v4i32 (i32) -> (v128) 1174; CHECK-NEXT: # %bb.0: 1175; CHECK-NEXT: local.get 0 1176; CHECK-NEXT: i32x4.load16x4_u 0 1177; CHECK-NEXT: # fallthrough-return 1178 %v = load <4 x i16>, ptr %p 1179 %v2 = zext <4 x i16> %v to <4 x i32> 1180 ret <4 x i32> %v2 1181} 1182 1183define <4 x i32> @load_sext_v4i8_to_v4i32(ptr %p) { 1184; CHECK-LABEL: load_sext_v4i8_to_v4i32: 1185; CHECK: .functype load_sext_v4i8_to_v4i32 (i32) -> (v128) 1186; CHECK-NEXT: # %bb.0: 1187; CHECK-NEXT: local.get 0 1188; CHECK-NEXT: v128.load32_zero 0 1189; CHECK-NEXT: i16x8.extend_low_i8x16_s 1190; CHECK-NEXT: i32x4.extend_low_i16x8_s 1191; CHECK-NEXT: # fallthrough-return 1192 %v = load <4 x i8>, ptr %p 1193 %v2 = sext <4 x i8> %v to <4 x i32> 1194 ret <4 x i32> %v2 1195} 1196 1197define <4 x i32> @load_zext_v4i8_to_v4i32(ptr %p) { 1198; CHECK-LABEL: load_zext_v4i8_to_v4i32: 1199; CHECK: .functype load_zext_v4i8_to_v4i32 (i32) -> (v128) 1200; CHECK-NEXT: # %bb.0: 1201; CHECK-NEXT: local.get 0 1202; CHECK-NEXT: v128.load32_zero 0 1203; CHECK-NEXT: i16x8.extend_low_i8x16_u 1204; CHECK-NEXT: i32x4.extend_low_i16x8_u 1205; CHECK-NEXT: # fallthrough-return 1206 %v = load <4 x i8>, ptr %p 1207 %v2 = zext <4 x i8> %v to <4 x i32> 1208 ret <4 x i32> %v2 1209} 1210 1211define <4 x i16> @load_ext_v4i32(ptr %p) { 1212; CHECK-LABEL: load_ext_v4i32: 1213; CHECK: .functype load_ext_v4i32 (i32) -> (v128) 1214; CHECK-NEXT: # %bb.0: 1215; CHECK-NEXT: local.get 0 1216; CHECK-NEXT: v128.load64_zero 0 1217; CHECK-NEXT: # fallthrough-return 1218 %v = load <4 x i16>, ptr %p 1219 ret <4 x i16> %v 1220} 1221 1222define <4 x i32> @load_v4i32_with_folded_offset(ptr %p) { 1223; CHECK-LABEL: load_v4i32_with_folded_offset: 1224; CHECK: .functype load_v4i32_with_folded_offset (i32) -> (v128) 1225; CHECK-NEXT: # %bb.0: 1226; CHECK-NEXT: local.get 0 1227; CHECK-NEXT: v128.load 16 1228; CHECK-NEXT: # fallthrough-return 1229 %q = ptrtoint ptr %p to i32 1230 %r = add nuw i32 %q, 16 1231 %s = inttoptr i32 %r to ptr 1232 %v = load <4 x i32>, ptr %s 1233 ret <4 x i32> %v 1234} 1235 1236define <4 x i32> @load_splat_v4i32_with_folded_offset(ptr %p) { 1237; CHECK-LABEL: load_splat_v4i32_with_folded_offset: 1238; CHECK: .functype load_splat_v4i32_with_folded_offset (i32) -> (v128) 1239; CHECK-NEXT: # %bb.0: 1240; CHECK-NEXT: local.get 0 1241; CHECK-NEXT: v128.load32_splat 16 1242; CHECK-NEXT: # fallthrough-return 1243 %q = ptrtoint ptr %p to i32 1244 %r = add nuw i32 %q, 16 1245 %s = inttoptr i32 %r to ptr 1246 %e = load i32, ptr %s 1247 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1248 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1249 ret <4 x i32> %v2 1250} 1251 1252define <4 x i32> @load_sext_v4i16_to_v4i32_with_folded_offset(ptr %p) { 1253; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_folded_offset: 1254; CHECK: .functype load_sext_v4i16_to_v4i32_with_folded_offset (i32) -> (v128) 1255; CHECK-NEXT: # %bb.0: 1256; CHECK-NEXT: local.get 0 1257; CHECK-NEXT: i32x4.load16x4_s 16 1258; CHECK-NEXT: # fallthrough-return 1259 %q = ptrtoint ptr %p to i32 1260 %r = add nuw i32 %q, 16 1261 %s = inttoptr i32 %r to ptr 1262 %v = load <4 x i16>, ptr %s 1263 %v2 = sext <4 x i16> %v to <4 x i32> 1264 ret <4 x i32> %v2 1265} 1266 1267define <4 x i32> @load_zext_v4i16_to_v4i32_with_folded_offset(ptr %p) { 1268; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_folded_offset: 1269; CHECK: .functype load_zext_v4i16_to_v4i32_with_folded_offset (i32) -> (v128) 1270; CHECK-NEXT: # %bb.0: 1271; CHECK-NEXT: local.get 0 1272; CHECK-NEXT: i32x4.load16x4_u 16 1273; CHECK-NEXT: # fallthrough-return 1274 %q = ptrtoint ptr %p to i32 1275 %r = add nuw i32 %q, 16 1276 %s = inttoptr i32 %r to ptr 1277 %v = load <4 x i16>, ptr %s 1278 %v2 = zext <4 x i16> %v to <4 x i32> 1279 ret <4 x i32> %v2 1280} 1281 1282define <4 x i32> @load_sext_v4i8_to_v4i32_with_folded_offset(ptr %p) { 1283; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_folded_offset: 1284; CHECK: .functype load_sext_v4i8_to_v4i32_with_folded_offset (i32) -> (v128) 1285; CHECK-NEXT: # %bb.0: 1286; CHECK-NEXT: local.get 0 1287; CHECK-NEXT: v128.load32_zero 16 1288; CHECK-NEXT: i16x8.extend_low_i8x16_s 1289; CHECK-NEXT: i32x4.extend_low_i16x8_s 1290; CHECK-NEXT: # fallthrough-return 1291 %q = ptrtoint ptr %p to i32 1292 %r = add nuw i32 %q, 16 1293 %s = inttoptr i32 %r to ptr 1294 %v = load <4 x i8>, ptr %s 1295 %v2 = sext <4 x i8> %v to <4 x i32> 1296 ret <4 x i32> %v2 1297} 1298 1299define <4 x i32> @load_zext_v4i8_to_v4i32_with_folded_offset(ptr %p) { 1300; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_offset: 1301; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_offset (i32) -> (v128) 1302; CHECK-NEXT: # %bb.0: 1303; CHECK-NEXT: local.get 0 1304; CHECK-NEXT: v128.load32_zero 16 1305; CHECK-NEXT: i16x8.extend_low_i8x16_u 1306; CHECK-NEXT: i32x4.extend_low_i16x8_u 1307; CHECK-NEXT: # fallthrough-return 1308 %q = ptrtoint ptr %p to i32 1309 %r = add nuw i32 %q, 16 1310 %s = inttoptr i32 %r to ptr 1311 %v = load <4 x i8>, ptr %s 1312 %v2 = zext <4 x i8> %v to <4 x i32> 1313 ret <4 x i32> %v2 1314} 1315 1316define <4 x i16> @load_ext_v4i32_with_folded_offset(ptr %p) { 1317; CHECK-LABEL: load_ext_v4i32_with_folded_offset: 1318; CHECK: .functype load_ext_v4i32_with_folded_offset (i32) -> (v128) 1319; CHECK-NEXT: # %bb.0: 1320; CHECK-NEXT: local.get 0 1321; CHECK-NEXT: v128.load64_zero 16 1322; CHECK-NEXT: # fallthrough-return 1323 %q = ptrtoint ptr %p to i32 1324 %r = add nuw i32 %q, 16 1325 %s = inttoptr i32 %r to ptr 1326 %v = load <4 x i16>, ptr %s 1327 ret <4 x i16> %v 1328} 1329 1330define <4 x i32> @load_v4i32_with_folded_gep_offset(ptr %p) { 1331; CHECK-LABEL: load_v4i32_with_folded_gep_offset: 1332; CHECK: .functype load_v4i32_with_folded_gep_offset (i32) -> (v128) 1333; CHECK-NEXT: # %bb.0: 1334; CHECK-NEXT: local.get 0 1335; CHECK-NEXT: v128.load 16 1336; CHECK-NEXT: # fallthrough-return 1337 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 1 1338 %v = load <4 x i32>, ptr %s 1339 ret <4 x i32> %v 1340} 1341 1342define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(ptr %p) { 1343; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset: 1344; CHECK: .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128) 1345; CHECK-NEXT: # %bb.0: 1346; CHECK-NEXT: local.get 0 1347; CHECK-NEXT: v128.load32_splat 4 1348; CHECK-NEXT: # fallthrough-return 1349 %s = getelementptr inbounds i32, ptr %p, i32 1 1350 %e = load i32, ptr %s 1351 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1352 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1353 ret <4 x i32> %v2 1354} 1355 1356define <4 x i32> @load_sext_v4i16_to_v4i32_with_folded_gep_offset(ptr %p) { 1357; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_folded_gep_offset: 1358; CHECK: .functype load_sext_v4i16_to_v4i32_with_folded_gep_offset (i32) -> (v128) 1359; CHECK-NEXT: # %bb.0: 1360; CHECK-NEXT: local.get 0 1361; CHECK-NEXT: i32x4.load16x4_s 8 1362; CHECK-NEXT: # fallthrough-return 1363 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1 1364 %v = load <4 x i16>, ptr %s 1365 %v2 = sext <4 x i16> %v to <4 x i32> 1366 ret <4 x i32> %v2 1367} 1368 1369define <4 x i32> @load_zext_v4i16_to_v4i32_with_folded_gep_offset(ptr %p) { 1370; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_folded_gep_offset: 1371; CHECK: .functype load_zext_v4i16_to_v4i32_with_folded_gep_offset (i32) -> (v128) 1372; CHECK-NEXT: # %bb.0: 1373; CHECK-NEXT: local.get 0 1374; CHECK-NEXT: i32x4.load16x4_u 8 1375; CHECK-NEXT: # fallthrough-return 1376 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1 1377 %v = load <4 x i16>, ptr %s 1378 %v2 = zext <4 x i16> %v to <4 x i32> 1379 ret <4 x i32> %v2 1380} 1381 1382define <4 x i32> @load_sext_v4i8_to_v4i32_with_folded_gep_offset(ptr %p) { 1383; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_folded_gep_offset: 1384; CHECK: .functype load_sext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128) 1385; CHECK-NEXT: # %bb.0: 1386; CHECK-NEXT: local.get 0 1387; CHECK-NEXT: v128.load32_zero 4 1388; CHECK-NEXT: i16x8.extend_low_i8x16_s 1389; CHECK-NEXT: i32x4.extend_low_i16x8_s 1390; CHECK-NEXT: # fallthrough-return 1391 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 1 1392 %v = load <4 x i8>, ptr %s 1393 %v2 = sext <4 x i8> %v to <4 x i32> 1394 ret <4 x i32> %v2 1395} 1396 1397define <4 x i32> @load_zext_v4i8_to_v4i32_with_folded_gep_offset(ptr %p) { 1398; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_gep_offset: 1399; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128) 1400; CHECK-NEXT: # %bb.0: 1401; CHECK-NEXT: local.get 0 1402; CHECK-NEXT: v128.load32_zero 4 1403; CHECK-NEXT: i16x8.extend_low_i8x16_u 1404; CHECK-NEXT: i32x4.extend_low_i16x8_u 1405; CHECK-NEXT: # fallthrough-return 1406 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 1 1407 %v = load <4 x i8>, ptr %s 1408 %v2 = zext <4 x i8> %v to <4 x i32> 1409 ret <4 x i32> %v2 1410} 1411 1412define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(ptr %p) { 1413; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset: 1414; CHECK: .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128) 1415; CHECK-NEXT: # %bb.0: 1416; CHECK-NEXT: local.get 0 1417; CHECK-NEXT: v128.load64_zero 8 1418; CHECK-NEXT: # fallthrough-return 1419 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1 1420 %v = load <4 x i16>, ptr %s 1421 ret <4 x i16> %v 1422} 1423 1424define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(ptr %p) { 1425; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset: 1426; CHECK: .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1427; CHECK-NEXT: # %bb.0: 1428; CHECK-NEXT: local.get 0 1429; CHECK-NEXT: i32.const -16 1430; CHECK-NEXT: i32.add 1431; CHECK-NEXT: v128.load 0 1432; CHECK-NEXT: # fallthrough-return 1433 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 -1 1434 %v = load <4 x i32>, ptr %s 1435 ret <4 x i32> %v 1436} 1437 1438define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(ptr %p) { 1439; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset: 1440; CHECK: .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1441; CHECK-NEXT: # %bb.0: 1442; CHECK-NEXT: local.get 0 1443; CHECK-NEXT: i32.const -4 1444; CHECK-NEXT: i32.add 1445; CHECK-NEXT: v128.load32_splat 0 1446; CHECK-NEXT: # fallthrough-return 1447 %s = getelementptr inbounds i32, ptr %p, i32 -1 1448 %e = load i32, ptr %s 1449 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1450 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1451 ret <4 x i32> %v2 1452} 1453 1454define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) { 1455; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset: 1456; CHECK: .functype load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1457; CHECK-NEXT: # %bb.0: 1458; CHECK-NEXT: local.get 0 1459; CHECK-NEXT: i32.const -8 1460; CHECK-NEXT: i32.add 1461; CHECK-NEXT: i32x4.load16x4_s 0 1462; CHECK-NEXT: # fallthrough-return 1463 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1 1464 %v = load <4 x i16>, ptr %s 1465 %v2 = sext <4 x i16> %v to <4 x i32> 1466 ret <4 x i32> %v2 1467} 1468 1469define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) { 1470; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset: 1471; CHECK: .functype load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1472; CHECK-NEXT: # %bb.0: 1473; CHECK-NEXT: local.get 0 1474; CHECK-NEXT: i32.const -8 1475; CHECK-NEXT: i32.add 1476; CHECK-NEXT: i32x4.load16x4_u 0 1477; CHECK-NEXT: # fallthrough-return 1478 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1 1479 %v = load <4 x i16>, ptr %s 1480 %v2 = zext <4 x i16> %v to <4 x i32> 1481 ret <4 x i32> %v2 1482} 1483 1484define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) { 1485; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset: 1486; CHECK: .functype load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1487; CHECK-NEXT: # %bb.0: 1488; CHECK-NEXT: local.get 0 1489; CHECK-NEXT: i32.const -4 1490; CHECK-NEXT: i32.add 1491; CHECK-NEXT: v128.load32_zero 0 1492; CHECK-NEXT: i16x8.extend_low_i8x16_s 1493; CHECK-NEXT: i32x4.extend_low_i16x8_s 1494; CHECK-NEXT: # fallthrough-return 1495 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 -1 1496 %v = load <4 x i8>, ptr %s 1497 %v2 = sext <4 x i8> %v to <4 x i32> 1498 ret <4 x i32> %v2 1499} 1500 1501define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) { 1502; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset: 1503; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1504; CHECK-NEXT: # %bb.0: 1505; CHECK-NEXT: local.get 0 1506; CHECK-NEXT: i32.const -4 1507; CHECK-NEXT: i32.add 1508; CHECK-NEXT: v128.load32_zero 0 1509; CHECK-NEXT: i16x8.extend_low_i8x16_u 1510; CHECK-NEXT: i32x4.extend_low_i16x8_u 1511; CHECK-NEXT: # fallthrough-return 1512 %s = getelementptr inbounds <4 x i8>, ptr %p, i32 -1 1513 %v = load <4 x i8>, ptr %s 1514 %v2 = zext <4 x i8> %v to <4 x i32> 1515 ret <4 x i32> %v2 1516} 1517 1518define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(ptr %p) { 1519; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset: 1520; CHECK: .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) 1521; CHECK-NEXT: # %bb.0: 1522; CHECK-NEXT: local.get 0 1523; CHECK-NEXT: i32.const -8 1524; CHECK-NEXT: i32.add 1525; CHECK-NEXT: v128.load64_zero 0 1526; CHECK-NEXT: # fallthrough-return 1527 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1 1528 %v = load <4 x i16>, ptr %s 1529 ret <4 x i16> %v 1530} 1531 1532define <4 x i32> @load_v4i32_with_unfolded_offset(ptr %p) { 1533; CHECK-LABEL: load_v4i32_with_unfolded_offset: 1534; CHECK: .functype load_v4i32_with_unfolded_offset (i32) -> (v128) 1535; CHECK-NEXT: # %bb.0: 1536; CHECK-NEXT: local.get 0 1537; CHECK-NEXT: i32.const 16 1538; CHECK-NEXT: i32.add 1539; CHECK-NEXT: v128.load 0 1540; CHECK-NEXT: # fallthrough-return 1541 %q = ptrtoint ptr %p to i32 1542 %r = add nsw i32 %q, 16 1543 %s = inttoptr i32 %r to ptr 1544 %v = load <4 x i32>, ptr %s 1545 ret <4 x i32> %v 1546} 1547 1548define <4 x i32> @load_splat_v4i32_with_unfolded_offset(ptr %p) { 1549; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset: 1550; CHECK: .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128) 1551; CHECK-NEXT: # %bb.0: 1552; CHECK-NEXT: local.get 0 1553; CHECK-NEXT: i32.const 16 1554; CHECK-NEXT: i32.add 1555; CHECK-NEXT: v128.load32_splat 0 1556; CHECK-NEXT: # fallthrough-return 1557 %q = ptrtoint ptr %p to i32 1558 %r = add nsw i32 %q, 16 1559 %s = inttoptr i32 %r to ptr 1560 %e = load i32, ptr %s 1561 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1562 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1563 ret <4 x i32> %v2 1564} 1565 1566define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_offset(ptr %p) { 1567; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_offset: 1568; CHECK: .functype load_sext_v4i16_to_v4i32_with_unfolded_offset (i32) -> (v128) 1569; CHECK-NEXT: # %bb.0: 1570; CHECK-NEXT: local.get 0 1571; CHECK-NEXT: i32.const 16 1572; CHECK-NEXT: i32.add 1573; CHECK-NEXT: i32x4.load16x4_s 0 1574; CHECK-NEXT: # fallthrough-return 1575 %q = ptrtoint ptr %p to i32 1576 %r = add nsw i32 %q, 16 1577 %s = inttoptr i32 %r to ptr 1578 %v = load <4 x i16>, ptr %s 1579 %v2 = sext <4 x i16> %v to <4 x i32> 1580 ret <4 x i32> %v2 1581} 1582 1583define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_offset(ptr %p) { 1584; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_offset: 1585; CHECK: .functype load_zext_v4i16_to_v4i32_with_unfolded_offset (i32) -> (v128) 1586; CHECK-NEXT: # %bb.0: 1587; CHECK-NEXT: local.get 0 1588; CHECK-NEXT: i32.const 16 1589; CHECK-NEXT: i32.add 1590; CHECK-NEXT: i32x4.load16x4_u 0 1591; CHECK-NEXT: # fallthrough-return 1592 %q = ptrtoint ptr %p to i32 1593 %r = add nsw i32 %q, 16 1594 %s = inttoptr i32 %r to ptr 1595 %v = load <4 x i16>, ptr %s 1596 %v2 = zext <4 x i16> %v to <4 x i32> 1597 ret <4 x i32> %v2 1598} 1599 1600define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_offset(ptr %p) { 1601; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_offset: 1602; CHECK: .functype load_sext_v4i8_to_v4i32_with_unfolded_offset (i32) -> (v128) 1603; CHECK-NEXT: # %bb.0: 1604; CHECK-NEXT: local.get 0 1605; CHECK-NEXT: i32.const 16 1606; CHECK-NEXT: i32.add 1607; CHECK-NEXT: v128.load32_zero 0 1608; CHECK-NEXT: i16x8.extend_low_i8x16_s 1609; CHECK-NEXT: i32x4.extend_low_i16x8_s 1610; CHECK-NEXT: # fallthrough-return 1611 %q = ptrtoint ptr %p to i32 1612 %r = add nsw i32 %q, 16 1613 %s = inttoptr i32 %r to ptr 1614 %v = load <4 x i8>, ptr %s 1615 %v2 = sext <4 x i8> %v to <4 x i32> 1616 ret <4 x i32> %v2 1617} 1618 1619define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_offset(ptr %p) { 1620; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_offset: 1621; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_offset (i32) -> (v128) 1622; CHECK-NEXT: # %bb.0: 1623; CHECK-NEXT: local.get 0 1624; CHECK-NEXT: i32.const 16 1625; CHECK-NEXT: i32.add 1626; CHECK-NEXT: v128.load32_zero 0 1627; CHECK-NEXT: i16x8.extend_low_i8x16_u 1628; CHECK-NEXT: i32x4.extend_low_i16x8_u 1629; CHECK-NEXT: # fallthrough-return 1630 %q = ptrtoint ptr %p to i32 1631 %r = add nsw i32 %q, 16 1632 %s = inttoptr i32 %r to ptr 1633 %v = load <4 x i8>, ptr %s 1634 %v2 = zext <4 x i8> %v to <4 x i32> 1635 ret <4 x i32> %v2 1636} 1637 1638define <4 x i16> @load_ext_v4i32_with_unfolded_offset(ptr %p) { 1639; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset: 1640; CHECK: .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128) 1641; CHECK-NEXT: # %bb.0: 1642; CHECK-NEXT: local.get 0 1643; CHECK-NEXT: i32.const 16 1644; CHECK-NEXT: i32.add 1645; CHECK-NEXT: v128.load64_zero 0 1646; CHECK-NEXT: # fallthrough-return 1647 %q = ptrtoint ptr %p to i32 1648 %r = add nsw i32 %q, 16 1649 %s = inttoptr i32 %r to ptr 1650 %v = load <4 x i16>, ptr %s 1651 ret <4 x i16> %v 1652} 1653 1654define <4 x i32> @load_v4i32_with_unfolded_gep_offset(ptr %p) { 1655; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset: 1656; CHECK: .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1657; CHECK-NEXT: # %bb.0: 1658; CHECK-NEXT: local.get 0 1659; CHECK-NEXT: i32.const 16 1660; CHECK-NEXT: i32.add 1661; CHECK-NEXT: v128.load 0 1662; CHECK-NEXT: # fallthrough-return 1663 %s = getelementptr <4 x i32>, ptr %p, i32 1 1664 %v = load <4 x i32>, ptr %s 1665 ret <4 x i32> %v 1666} 1667 1668define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(ptr %p) { 1669; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset: 1670; CHECK: .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1671; CHECK-NEXT: # %bb.0: 1672; CHECK-NEXT: local.get 0 1673; CHECK-NEXT: i32.const 4 1674; CHECK-NEXT: i32.add 1675; CHECK-NEXT: v128.load32_splat 0 1676; CHECK-NEXT: # fallthrough-return 1677 %s = getelementptr i32, ptr %p, i32 1 1678 %e = load i32, ptr %s 1679 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1680 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1681 ret <4 x i32> %v2 1682} 1683 1684define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_gep_offset(ptr %p) { 1685; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_gep_offset: 1686; CHECK: .functype load_sext_v4i16_to_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1687; CHECK-NEXT: # %bb.0: 1688; CHECK-NEXT: local.get 0 1689; CHECK-NEXT: i32.const 8 1690; CHECK-NEXT: i32.add 1691; CHECK-NEXT: i32x4.load16x4_s 0 1692; CHECK-NEXT: # fallthrough-return 1693 %s = getelementptr <4 x i16>, ptr %p, i32 1 1694 %v = load <4 x i16>, ptr %s 1695 %v2 = sext <4 x i16> %v to <4 x i32> 1696 ret <4 x i32> %v2 1697} 1698 1699define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_gep_offset(ptr %p) { 1700; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_gep_offset: 1701; CHECK: .functype load_zext_v4i16_to_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1702; CHECK-NEXT: # %bb.0: 1703; CHECK-NEXT: local.get 0 1704; CHECK-NEXT: i32.const 8 1705; CHECK-NEXT: i32.add 1706; CHECK-NEXT: i32x4.load16x4_u 0 1707; CHECK-NEXT: # fallthrough-return 1708 %s = getelementptr <4 x i16>, ptr %p, i32 1 1709 %v = load <4 x i16>, ptr %s 1710 %v2 = zext <4 x i16> %v to <4 x i32> 1711 ret <4 x i32> %v2 1712} 1713 1714define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_gep_offset(ptr %p) { 1715; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_gep_offset: 1716; CHECK: .functype load_sext_v4i8_to_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1717; CHECK-NEXT: # %bb.0: 1718; CHECK-NEXT: local.get 0 1719; CHECK-NEXT: i32.const 4 1720; CHECK-NEXT: i32.add 1721; CHECK-NEXT: v128.load32_zero 0 1722; CHECK-NEXT: i16x8.extend_low_i8x16_s 1723; CHECK-NEXT: i32x4.extend_low_i16x8_s 1724; CHECK-NEXT: # fallthrough-return 1725 %s = getelementptr <4 x i8>, ptr %p, i32 1 1726 %v = load <4 x i8>, ptr %s 1727 %v2 = sext <4 x i8> %v to <4 x i32> 1728 ret <4 x i32> %v2 1729} 1730 1731define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_gep_offset(ptr %p) { 1732; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_offset: 1733; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1734; CHECK-NEXT: # %bb.0: 1735; CHECK-NEXT: local.get 0 1736; CHECK-NEXT: i32.const 4 1737; CHECK-NEXT: i32.add 1738; CHECK-NEXT: v128.load32_zero 0 1739; CHECK-NEXT: i16x8.extend_low_i8x16_u 1740; CHECK-NEXT: i32x4.extend_low_i16x8_u 1741; CHECK-NEXT: # fallthrough-return 1742 %s = getelementptr <4 x i8>, ptr %p, i32 1 1743 %v = load <4 x i8>, ptr %s 1744 %v2 = zext <4 x i8> %v to <4 x i32> 1745 ret <4 x i32> %v2 1746} 1747 1748define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(ptr %p) { 1749; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset: 1750; CHECK: .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128) 1751; CHECK-NEXT: # %bb.0: 1752; CHECK-NEXT: local.get 0 1753; CHECK-NEXT: i32.const 8 1754; CHECK-NEXT: i32.add 1755; CHECK-NEXT: v128.load64_zero 0 1756; CHECK-NEXT: # fallthrough-return 1757 %s = getelementptr <4 x i16>, ptr %p, i32 1 1758 %v = load <4 x i16>, ptr %s 1759 ret <4 x i16> %v 1760} 1761 1762define <4 x i32> @load_v4i32_from_numeric_address() { 1763; CHECK-LABEL: load_v4i32_from_numeric_address: 1764; CHECK: .functype load_v4i32_from_numeric_address () -> (v128) 1765; CHECK-NEXT: # %bb.0: 1766; CHECK-NEXT: i32.const 0 1767; CHECK-NEXT: v128.load 32 1768; CHECK-NEXT: # fallthrough-return 1769 %s = inttoptr i32 32 to ptr 1770 %v = load <4 x i32>, ptr %s 1771 ret <4 x i32> %v 1772} 1773 1774define <4 x i32> @load_splat_v4i32_from_numeric_address() { 1775; CHECK-LABEL: load_splat_v4i32_from_numeric_address: 1776; CHECK: .functype load_splat_v4i32_from_numeric_address () -> (v128) 1777; CHECK-NEXT: # %bb.0: 1778; CHECK-NEXT: i32.const 0 1779; CHECK-NEXT: v128.load32_splat 32 1780; CHECK-NEXT: # fallthrough-return 1781 %s = inttoptr i32 32 to ptr 1782 %e = load i32, ptr %s 1783 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1784 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1785 ret <4 x i32> %v2 1786} 1787 1788define <4 x i32> @load_sext_v4i16_to_v4i32_from_numeric_address() { 1789; CHECK-LABEL: load_sext_v4i16_to_v4i32_from_numeric_address: 1790; CHECK: .functype load_sext_v4i16_to_v4i32_from_numeric_address () -> (v128) 1791; CHECK-NEXT: # %bb.0: 1792; CHECK-NEXT: i32.const 0 1793; CHECK-NEXT: i32x4.load16x4_s 32 1794; CHECK-NEXT: # fallthrough-return 1795 %s = inttoptr i32 32 to ptr 1796 %v = load <4 x i16>, ptr %s 1797 %v2 = sext <4 x i16> %v to <4 x i32> 1798 ret <4 x i32> %v2 1799} 1800 1801define <4 x i32> @load_zext_v4i16_to_v4i32_from_numeric_address() { 1802; CHECK-LABEL: load_zext_v4i16_to_v4i32_from_numeric_address: 1803; CHECK: .functype load_zext_v4i16_to_v4i32_from_numeric_address () -> (v128) 1804; CHECK-NEXT: # %bb.0: 1805; CHECK-NEXT: i32.const 0 1806; CHECK-NEXT: i32x4.load16x4_u 32 1807; CHECK-NEXT: # fallthrough-return 1808 %s = inttoptr i32 32 to ptr 1809 %v = load <4 x i16>, ptr %s 1810 %v2 = zext <4 x i16> %v to <4 x i32> 1811 ret <4 x i32> %v2 1812} 1813 1814define <4 x i32> @load_sext_v4i8_to_v4i32_from_numeric_address() { 1815; CHECK-LABEL: load_sext_v4i8_to_v4i32_from_numeric_address: 1816; CHECK: .functype load_sext_v4i8_to_v4i32_from_numeric_address () -> (v128) 1817; CHECK-NEXT: # %bb.0: 1818; CHECK-NEXT: i32.const 0 1819; CHECK-NEXT: v128.load32_zero 32 1820; CHECK-NEXT: i16x8.extend_low_i8x16_s 1821; CHECK-NEXT: i32x4.extend_low_i16x8_s 1822; CHECK-NEXT: # fallthrough-return 1823 %s = inttoptr i32 32 to ptr 1824 %v = load <4 x i8>, ptr %s 1825 %v2 = sext <4 x i8> %v to <4 x i32> 1826 ret <4 x i32> %v2 1827} 1828 1829define <4 x i32> @load_zext_v4i8_to_v4i32_from_numeric_address() { 1830; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_numeric_address: 1831; CHECK: .functype load_zext_v4i8_to_v4i32_from_numeric_address () -> (v128) 1832; CHECK-NEXT: # %bb.0: 1833; CHECK-NEXT: i32.const 0 1834; CHECK-NEXT: v128.load32_zero 32 1835; CHECK-NEXT: i16x8.extend_low_i8x16_u 1836; CHECK-NEXT: i32x4.extend_low_i16x8_u 1837; CHECK-NEXT: # fallthrough-return 1838 %s = inttoptr i32 32 to ptr 1839 %v = load <4 x i8>, ptr %s 1840 %v2 = zext <4 x i8> %v to <4 x i32> 1841 ret <4 x i32> %v2 1842} 1843 1844define <4 x i16> @load_ext_v4i32_from_numeric_address() { 1845; CHECK-LABEL: load_ext_v4i32_from_numeric_address: 1846; CHECK: .functype load_ext_v4i32_from_numeric_address () -> (v128) 1847; CHECK-NEXT: # %bb.0: 1848; CHECK-NEXT: i32.const 0 1849; CHECK-NEXT: v128.load64_zero 32 1850; CHECK-NEXT: # fallthrough-return 1851 %s = inttoptr i32 32 to ptr 1852 %v = load <4 x i16>, ptr %s 1853 ret <4 x i16> %v 1854} 1855 1856@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42> 1857define <4 x i32> @load_v4i32_from_global_address() { 1858; CHECK-LABEL: load_v4i32_from_global_address: 1859; CHECK: .functype load_v4i32_from_global_address () -> (v128) 1860; CHECK-NEXT: # %bb.0: 1861; CHECK-NEXT: i32.const 0 1862; CHECK-NEXT: v128.load gv_v4i32 1863; CHECK-NEXT: # fallthrough-return 1864 %v = load <4 x i32>, ptr @gv_v4i32 1865 ret <4 x i32> %v 1866} 1867 1868@gv_i32 = global i32 42 1869define <4 x i32> @load_splat_v4i32_from_global_address() { 1870; CHECK-LABEL: load_splat_v4i32_from_global_address: 1871; CHECK: .functype load_splat_v4i32_from_global_address () -> (v128) 1872; CHECK-NEXT: # %bb.0: 1873; CHECK-NEXT: i32.const 0 1874; CHECK-NEXT: v128.load32_splat gv_i32 1875; CHECK-NEXT: # fallthrough-return 1876 %e = load i32, ptr @gv_i32 1877 %v1 = insertelement <4 x i32> undef, i32 %e, i32 0 1878 %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer 1879 ret <4 x i32> %v2 1880} 1881 1882@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42> 1883define <4 x i32> @load_sext_v4i16_to_v4i32_from_global_address() { 1884; CHECK-LABEL: load_sext_v4i16_to_v4i32_from_global_address: 1885; CHECK: .functype load_sext_v4i16_to_v4i32_from_global_address () -> (v128) 1886; CHECK-NEXT: # %bb.0: 1887; CHECK-NEXT: i32.const 0 1888; CHECK-NEXT: i32x4.load16x4_s gv_v4i16 1889; CHECK-NEXT: # fallthrough-return 1890 %v = load <4 x i16>, ptr @gv_v4i16 1891 %v2 = sext <4 x i16> %v to <4 x i32> 1892 ret <4 x i32> %v2 1893} 1894 1895define <4 x i32> @load_zext_v4i16_to_v4i32_from_global_address() { 1896; CHECK-LABEL: load_zext_v4i16_to_v4i32_from_global_address: 1897; CHECK: .functype load_zext_v4i16_to_v4i32_from_global_address () -> (v128) 1898; CHECK-NEXT: # %bb.0: 1899; CHECK-NEXT: i32.const 0 1900; CHECK-NEXT: i32x4.load16x4_u gv_v4i16 1901; CHECK-NEXT: # fallthrough-return 1902 %v = load <4 x i16>, ptr @gv_v4i16 1903 %v2 = zext <4 x i16> %v to <4 x i32> 1904 ret <4 x i32> %v2 1905} 1906 1907@gv_v4i8 = global <4 x i8> <i8 42, i8 42, i8 42, i8 42> 1908define <4 x i32> @load_sext_v4i8_to_v4i32_from_global_address() { 1909; CHECK-LABEL: load_sext_v4i8_to_v4i32_from_global_address: 1910; CHECK: .functype load_sext_v4i8_to_v4i32_from_global_address () -> (v128) 1911; CHECK-NEXT: # %bb.0: 1912; CHECK-NEXT: i32.const 0 1913; CHECK-NEXT: v128.load32_zero gv_v4i8 1914; CHECK-NEXT: i16x8.extend_low_i8x16_s 1915; CHECK-NEXT: i32x4.extend_low_i16x8_s 1916; CHECK-NEXT: # fallthrough-return 1917 %v = load <4 x i8>, ptr @gv_v4i8 1918 %v2 = sext <4 x i8> %v to <4 x i32> 1919 ret <4 x i32> %v2 1920} 1921 1922define <4 x i32> @load_zext_v4i8_to_v4i32_from_global_address() { 1923; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_global_address: 1924; CHECK: .functype load_zext_v4i8_to_v4i32_from_global_address () -> (v128) 1925; CHECK-NEXT: # %bb.0: 1926; CHECK-NEXT: i32.const 0 1927; CHECK-NEXT: v128.load32_zero gv_v4i8 1928; CHECK-NEXT: i16x8.extend_low_i8x16_u 1929; CHECK-NEXT: i32x4.extend_low_i16x8_u 1930; CHECK-NEXT: # fallthrough-return 1931 %v = load <4 x i8>, ptr @gv_v4i8 1932 %v2 = zext <4 x i8> %v to <4 x i32> 1933 ret <4 x i32> %v2 1934} 1935 1936define <4 x i16> @load_ext_v4i32_from_global_address() { 1937; CHECK-LABEL: load_ext_v4i32_from_global_address: 1938; CHECK: .functype load_ext_v4i32_from_global_address () -> (v128) 1939; CHECK-NEXT: # %bb.0: 1940; CHECK-NEXT: i32.const 0 1941; CHECK-NEXT: v128.load64_zero gv_v4i16 1942; CHECK-NEXT: # fallthrough-return 1943 %v = load <4 x i16>, ptr @gv_v4i16 1944 ret <4 x i16> %v 1945} 1946 1947define void @store_v4i32(<4 x i32> %v, ptr %p) { 1948; CHECK-LABEL: store_v4i32: 1949; CHECK: .functype store_v4i32 (v128, i32) -> () 1950; CHECK-NEXT: # %bb.0: 1951; CHECK-NEXT: local.get 1 1952; CHECK-NEXT: local.get 0 1953; CHECK-NEXT: v128.store 0 1954; CHECK-NEXT: # fallthrough-return 1955 store <4 x i32> %v , ptr %p 1956 ret void 1957} 1958 1959define void @store_narrowing_v4i32(<4 x i16> %v, ptr %p) { 1960; CHECK-LABEL: store_narrowing_v4i32: 1961; CHECK: .functype store_narrowing_v4i32 (v128, i32) -> () 1962; CHECK-NEXT: # %bb.0: 1963; CHECK-NEXT: local.get 1 1964; CHECK-NEXT: local.get 0 1965; CHECK-NEXT: v128.store64_lane 0, 0 1966; CHECK-NEXT: # fallthrough-return 1967 store <4 x i16> %v , ptr %p 1968 ret void 1969} 1970 1971define void @store_v4i32_with_folded_offset(<4 x i32> %v, ptr %p) { 1972; CHECK-LABEL: store_v4i32_with_folded_offset: 1973; CHECK: .functype store_v4i32_with_folded_offset (v128, i32) -> () 1974; CHECK-NEXT: # %bb.0: 1975; CHECK-NEXT: local.get 1 1976; CHECK-NEXT: local.get 0 1977; CHECK-NEXT: v128.store 16 1978; CHECK-NEXT: # fallthrough-return 1979 %q = ptrtoint ptr %p to i32 1980 %r = add nuw i32 %q, 16 1981 %s = inttoptr i32 %r to ptr 1982 store <4 x i32> %v , ptr %s 1983 ret void 1984} 1985 1986define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, ptr %p) { 1987; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset: 1988; CHECK: .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> () 1989; CHECK-NEXT: # %bb.0: 1990; CHECK-NEXT: local.get 1 1991; CHECK-NEXT: local.get 0 1992; CHECK-NEXT: v128.store64_lane 16, 0 1993; CHECK-NEXT: # fallthrough-return 1994 %q = ptrtoint ptr %p to i32 1995 %r = add nuw i32 %q, 16 1996 %s = inttoptr i32 %r to ptr 1997 store <4 x i16> %v , ptr %s 1998 ret void 1999} 2000 2001define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, ptr %p) { 2002; CHECK-LABEL: store_v4i32_with_folded_gep_offset: 2003; CHECK: .functype store_v4i32_with_folded_gep_offset (v128, i32) -> () 2004; CHECK-NEXT: # %bb.0: 2005; CHECK-NEXT: local.get 1 2006; CHECK-NEXT: local.get 0 2007; CHECK-NEXT: v128.store 16 2008; CHECK-NEXT: # fallthrough-return 2009 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 1 2010 store <4 x i32> %v , ptr %s 2011 ret void 2012} 2013 2014define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, ptr %p) { 2015; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset: 2016; CHECK: .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> () 2017; CHECK-NEXT: # %bb.0: 2018; CHECK-NEXT: local.get 1 2019; CHECK-NEXT: local.get 0 2020; CHECK-NEXT: v128.store64_lane 8, 0 2021; CHECK-NEXT: # fallthrough-return 2022 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1 2023 store <4 x i16> %v , ptr %s 2024 ret void 2025} 2026 2027define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, ptr %p) { 2028; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset: 2029; CHECK: .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 2030; CHECK-NEXT: # %bb.0: 2031; CHECK-NEXT: local.get 1 2032; CHECK-NEXT: i32.const -16 2033; CHECK-NEXT: i32.add 2034; CHECK-NEXT: local.get 0 2035; CHECK-NEXT: v128.store 0 2036; CHECK-NEXT: # fallthrough-return 2037 %s = getelementptr inbounds <4 x i32>, ptr %p, i32 -1 2038 store <4 x i32> %v , ptr %s 2039 ret void 2040} 2041 2042define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, ptr %p) { 2043; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset: 2044; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> () 2045; CHECK-NEXT: # %bb.0: 2046; CHECK-NEXT: local.get 1 2047; CHECK-NEXT: i32.const -8 2048; CHECK-NEXT: i32.add 2049; CHECK-NEXT: local.get 0 2050; CHECK-NEXT: v128.store64_lane 0, 0 2051; CHECK-NEXT: # fallthrough-return 2052 %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1 2053 store <4 x i16> %v , ptr %s 2054 ret void 2055} 2056 2057define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, ptr %p) { 2058; CHECK-LABEL: store_v4i32_with_unfolded_offset: 2059; CHECK: .functype store_v4i32_with_unfolded_offset (v128, i32) -> () 2060; CHECK-NEXT: # %bb.0: 2061; CHECK-NEXT: local.get 1 2062; CHECK-NEXT: i32.const 16 2063; CHECK-NEXT: i32.add 2064; CHECK-NEXT: local.get 0 2065; CHECK-NEXT: v128.store 0 2066; CHECK-NEXT: # fallthrough-return 2067 %q = ptrtoint ptr %p to i32 2068 %r = add nsw i32 %q, 16 2069 %s = inttoptr i32 %r to ptr 2070 store <4 x i32> %v , ptr %s 2071 ret void 2072} 2073 2074define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, ptr %p) { 2075; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset: 2076; CHECK: .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> () 2077; CHECK-NEXT: # %bb.0: 2078; CHECK-NEXT: local.get 1 2079; CHECK-NEXT: i32.const 16 2080; CHECK-NEXT: i32.add 2081; CHECK-NEXT: local.get 0 2082; CHECK-NEXT: v128.store64_lane 0, 0 2083; CHECK-NEXT: # fallthrough-return 2084 %q = ptrtoint ptr %p to i32 2085 %r = add nsw i32 %q, 16 2086 %s = inttoptr i32 %r to ptr 2087 store <4 x i16> %v , ptr %s 2088 ret void 2089} 2090 2091define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, ptr %p) { 2092; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset: 2093; CHECK: .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> () 2094; CHECK-NEXT: # %bb.0: 2095; CHECK-NEXT: local.get 1 2096; CHECK-NEXT: i32.const 16 2097; CHECK-NEXT: i32.add 2098; CHECK-NEXT: local.get 0 2099; CHECK-NEXT: v128.store 0 2100; CHECK-NEXT: # fallthrough-return 2101 %s = getelementptr <4 x i32>, ptr %p, i32 1 2102 store <4 x i32> %v , ptr %s 2103 ret void 2104} 2105 2106define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, ptr %p) { 2107; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset: 2108; CHECK: .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> () 2109; CHECK-NEXT: # %bb.0: 2110; CHECK-NEXT: local.get 1 2111; CHECK-NEXT: i32.const 8 2112; CHECK-NEXT: i32.add 2113; CHECK-NEXT: local.get 0 2114; CHECK-NEXT: v128.store64_lane 0, 0 2115; CHECK-NEXT: # fallthrough-return 2116 %s = getelementptr <4 x i16>, ptr %p, i32 1 2117 store <4 x i16> %v , ptr %s 2118 ret void 2119} 2120 2121define void @store_v4i32_to_numeric_address(<4 x i32> %v) { 2122; CHECK-LABEL: store_v4i32_to_numeric_address: 2123; CHECK: .functype store_v4i32_to_numeric_address (v128) -> () 2124; CHECK-NEXT: # %bb.0: 2125; CHECK-NEXT: i32.const 0 2126; CHECK-NEXT: local.get 0 2127; CHECK-NEXT: v128.store 32 2128; CHECK-NEXT: # fallthrough-return 2129 %s = inttoptr i32 32 to ptr 2130 store <4 x i32> %v , ptr %s 2131 ret void 2132} 2133 2134define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) { 2135; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address: 2136; CHECK: .functype store_narrowing_v4i32_to_numeric_address (v128) -> () 2137; CHECK-NEXT: # %bb.0: 2138; CHECK-NEXT: i32.const 0 2139; CHECK-NEXT: local.get 0 2140; CHECK-NEXT: v128.store64_lane 32, 0 2141; CHECK-NEXT: # fallthrough-return 2142 %s = inttoptr i32 32 to ptr 2143 store <4 x i16> %v , ptr %s 2144 ret void 2145} 2146 2147define void @store_v4i32_to_global_address(<4 x i32> %v) { 2148; CHECK-LABEL: store_v4i32_to_global_address: 2149; CHECK: .functype store_v4i32_to_global_address (v128) -> () 2150; CHECK-NEXT: # %bb.0: 2151; CHECK-NEXT: i32.const 0 2152; CHECK-NEXT: local.get 0 2153; CHECK-NEXT: v128.store gv_v4i32 2154; CHECK-NEXT: # fallthrough-return 2155 store <4 x i32> %v , ptr @gv_v4i32 2156 ret void 2157} 2158 2159define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) { 2160; CHECK-LABEL: store_narrowing_v4i32_to_global_address: 2161; CHECK: .functype store_narrowing_v4i32_to_global_address (v128) -> () 2162; CHECK-NEXT: # %bb.0: 2163; CHECK-NEXT: i32.const 0 2164; CHECK-NEXT: local.get 0 2165; CHECK-NEXT: v128.store64_lane gv_v4i16, 0 2166; CHECK-NEXT: # fallthrough-return 2167 store <4 x i16> %v , ptr @gv_v4i16 2168 ret void 2169} 2170 2171; ============================================================================== 2172; 2 x i64 2173; ============================================================================== 2174define <2 x i64> @load_v2i64(ptr %p) { 2175; CHECK-LABEL: load_v2i64: 2176; CHECK: .functype load_v2i64 (i32) -> (v128) 2177; CHECK-NEXT: # %bb.0: 2178; CHECK-NEXT: local.get 0 2179; CHECK-NEXT: v128.load 0 2180; CHECK-NEXT: # fallthrough-return 2181 %v = load <2 x i64>, ptr %p 2182 ret <2 x i64> %v 2183} 2184 2185define <2 x i64> @load_splat_v2i64(ptr %p) { 2186; CHECK-LABEL: load_splat_v2i64: 2187; CHECK: .functype load_splat_v2i64 (i32) -> (v128) 2188; CHECK-NEXT: # %bb.0: 2189; CHECK-NEXT: local.get 0 2190; CHECK-NEXT: v128.load64_splat 0 2191; CHECK-NEXT: # fallthrough-return 2192 %e = load i64, ptr %p 2193 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2194 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2195 ret <2 x i64> %v2 2196} 2197 2198define <2 x i64> @load_sext_v2i64(ptr %p) { 2199; CHECK-LABEL: load_sext_v2i64: 2200; CHECK: .functype load_sext_v2i64 (i32) -> (v128) 2201; CHECK-NEXT: # %bb.0: 2202; CHECK-NEXT: local.get 0 2203; CHECK-NEXT: i64x2.load32x2_s 0 2204; CHECK-NEXT: # fallthrough-return 2205 %v = load <2 x i32>, ptr %p 2206 %v2 = sext <2 x i32> %v to <2 x i64> 2207 ret <2 x i64> %v2 2208} 2209 2210define <2 x i64> @load_zext_v2i64(ptr %p) { 2211; CHECK-LABEL: load_zext_v2i64: 2212; CHECK: .functype load_zext_v2i64 (i32) -> (v128) 2213; CHECK-NEXT: # %bb.0: 2214; CHECK-NEXT: local.get 0 2215; CHECK-NEXT: i64x2.load32x2_u 0 2216; CHECK-NEXT: # fallthrough-return 2217 %v = load <2 x i32>, ptr %p 2218 %v2 = zext <2 x i32> %v to <2 x i64> 2219 ret <2 x i64> %v2 2220} 2221 2222define <2 x i32> @load_ext_v2i64(ptr %p) { 2223; CHECK-LABEL: load_ext_v2i64: 2224; CHECK: .functype load_ext_v2i64 (i32) -> (v128) 2225; CHECK-NEXT: # %bb.0: 2226; CHECK-NEXT: local.get 0 2227; CHECK-NEXT: v128.load64_zero 0 2228; CHECK-NEXT: # fallthrough-return 2229 %v = load <2 x i32>, ptr %p 2230 ret <2 x i32> %v 2231} 2232 2233define <2 x i64> @load_v2i64_with_folded_offset(ptr %p) { 2234; CHECK-LABEL: load_v2i64_with_folded_offset: 2235; CHECK: .functype load_v2i64_with_folded_offset (i32) -> (v128) 2236; CHECK-NEXT: # %bb.0: 2237; CHECK-NEXT: local.get 0 2238; CHECK-NEXT: v128.load 16 2239; CHECK-NEXT: # fallthrough-return 2240 %q = ptrtoint ptr %p to i32 2241 %r = add nuw i32 %q, 16 2242 %s = inttoptr i32 %r to ptr 2243 %v = load <2 x i64>, ptr %s 2244 ret <2 x i64> %v 2245} 2246 2247define <2 x i64> @load_splat_v2i64_with_folded_offset(ptr %p) { 2248; CHECK-LABEL: load_splat_v2i64_with_folded_offset: 2249; CHECK: .functype load_splat_v2i64_with_folded_offset (i32) -> (v128) 2250; CHECK-NEXT: # %bb.0: 2251; CHECK-NEXT: local.get 0 2252; CHECK-NEXT: v128.load64_splat 16 2253; CHECK-NEXT: # fallthrough-return 2254 %q = ptrtoint ptr %p to i32 2255 %r = add nuw i32 %q, 16 2256 %s = inttoptr i32 %r to ptr 2257 %e = load i64, ptr %s 2258 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2259 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2260 ret <2 x i64> %v2 2261} 2262 2263define <2 x i64> @load_sext_v2i64_with_folded_offset(ptr %p) { 2264; CHECK-LABEL: load_sext_v2i64_with_folded_offset: 2265; CHECK: .functype load_sext_v2i64_with_folded_offset (i32) -> (v128) 2266; CHECK-NEXT: # %bb.0: 2267; CHECK-NEXT: local.get 0 2268; CHECK-NEXT: i64x2.load32x2_s 16 2269; CHECK-NEXT: # fallthrough-return 2270 %q = ptrtoint ptr %p to i32 2271 %r = add nuw i32 %q, 16 2272 %s = inttoptr i32 %r to ptr 2273 %v = load <2 x i32>, ptr %s 2274 %v2 = sext <2 x i32> %v to <2 x i64> 2275 ret <2 x i64> %v2 2276} 2277 2278define <2 x i64> @load_zext_v2i64_with_folded_offset(ptr %p) { 2279; CHECK-LABEL: load_zext_v2i64_with_folded_offset: 2280; CHECK: .functype load_zext_v2i64_with_folded_offset (i32) -> (v128) 2281; CHECK-NEXT: # %bb.0: 2282; CHECK-NEXT: local.get 0 2283; CHECK-NEXT: i64x2.load32x2_u 16 2284; CHECK-NEXT: # fallthrough-return 2285 %q = ptrtoint ptr %p to i32 2286 %r = add nuw i32 %q, 16 2287 %s = inttoptr i32 %r to ptr 2288 %v = load <2 x i32>, ptr %s 2289 %v2 = zext <2 x i32> %v to <2 x i64> 2290 ret <2 x i64> %v2 2291} 2292 2293define <2 x i32> @load_ext_v2i64_with_folded_offset(ptr %p) { 2294; CHECK-LABEL: load_ext_v2i64_with_folded_offset: 2295; CHECK: .functype load_ext_v2i64_with_folded_offset (i32) -> (v128) 2296; CHECK-NEXT: # %bb.0: 2297; CHECK-NEXT: local.get 0 2298; CHECK-NEXT: v128.load64_zero 16 2299; CHECK-NEXT: # fallthrough-return 2300 %q = ptrtoint ptr %p to i32 2301 %r = add nuw i32 %q, 16 2302 %s = inttoptr i32 %r to ptr 2303 %v = load <2 x i32>, ptr %s 2304 ret <2 x i32> %v 2305} 2306 2307define <2 x i64> @load_v2i64_with_folded_gep_offset(ptr %p) { 2308; CHECK-LABEL: load_v2i64_with_folded_gep_offset: 2309; CHECK: .functype load_v2i64_with_folded_gep_offset (i32) -> (v128) 2310; CHECK-NEXT: # %bb.0: 2311; CHECK-NEXT: local.get 0 2312; CHECK-NEXT: v128.load 16 2313; CHECK-NEXT: # fallthrough-return 2314 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 1 2315 %v = load <2 x i64>, ptr %s 2316 ret <2 x i64> %v 2317} 2318 2319define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(ptr %p) { 2320; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset: 2321; CHECK: .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128) 2322; CHECK-NEXT: # %bb.0: 2323; CHECK-NEXT: local.get 0 2324; CHECK-NEXT: v128.load64_splat 8 2325; CHECK-NEXT: # fallthrough-return 2326 %s = getelementptr inbounds i64, ptr %p, i32 1 2327 %e = load i64, ptr %s 2328 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2329 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2330 ret <2 x i64> %v2 2331} 2332 2333define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(ptr %p) { 2334; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset: 2335; CHECK: .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128) 2336; CHECK-NEXT: # %bb.0: 2337; CHECK-NEXT: local.get 0 2338; CHECK-NEXT: i64x2.load32x2_s 8 2339; CHECK-NEXT: # fallthrough-return 2340 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1 2341 %v = load <2 x i32>, ptr %s 2342 %v2 = sext <2 x i32> %v to <2 x i64> 2343 ret <2 x i64> %v2 2344} 2345 2346define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(ptr %p) { 2347; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset: 2348; CHECK: .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128) 2349; CHECK-NEXT: # %bb.0: 2350; CHECK-NEXT: local.get 0 2351; CHECK-NEXT: i64x2.load32x2_u 8 2352; CHECK-NEXT: # fallthrough-return 2353 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1 2354 %v = load <2 x i32>, ptr %s 2355 %v2 = zext <2 x i32> %v to <2 x i64> 2356 ret <2 x i64> %v2 2357} 2358 2359define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(ptr %p) { 2360; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset: 2361; CHECK: .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128) 2362; CHECK-NEXT: # %bb.0: 2363; CHECK-NEXT: local.get 0 2364; CHECK-NEXT: v128.load64_zero 8 2365; CHECK-NEXT: # fallthrough-return 2366 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1 2367 %v = load <2 x i32>, ptr %s 2368 ret <2 x i32> %v 2369} 2370 2371define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(ptr %p) { 2372; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset: 2373; CHECK: .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2374; CHECK-NEXT: # %bb.0: 2375; CHECK-NEXT: local.get 0 2376; CHECK-NEXT: i32.const -16 2377; CHECK-NEXT: i32.add 2378; CHECK-NEXT: v128.load 0 2379; CHECK-NEXT: # fallthrough-return 2380 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 -1 2381 %v = load <2 x i64>, ptr %s 2382 ret <2 x i64> %v 2383} 2384 2385define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(ptr %p) { 2386; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset: 2387; CHECK: .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2388; CHECK-NEXT: # %bb.0: 2389; CHECK-NEXT: local.get 0 2390; CHECK-NEXT: i32.const -8 2391; CHECK-NEXT: i32.add 2392; CHECK-NEXT: v128.load64_splat 0 2393; CHECK-NEXT: # fallthrough-return 2394 %s = getelementptr inbounds i64, ptr %p, i32 -1 2395 %e = load i64, ptr %s 2396 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2397 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2398 ret <2 x i64> %v2 2399} 2400 2401define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(ptr %p) { 2402; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset: 2403; CHECK: .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2404; CHECK-NEXT: # %bb.0: 2405; CHECK-NEXT: local.get 0 2406; CHECK-NEXT: i32.const -8 2407; CHECK-NEXT: i32.add 2408; CHECK-NEXT: i64x2.load32x2_s 0 2409; CHECK-NEXT: # fallthrough-return 2410 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1 2411 %v = load <2 x i32>, ptr %s 2412 %v2 = sext <2 x i32> %v to <2 x i64> 2413 ret <2 x i64> %v2 2414} 2415 2416define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(ptr %p) { 2417; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset: 2418; CHECK: .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2419; CHECK-NEXT: # %bb.0: 2420; CHECK-NEXT: local.get 0 2421; CHECK-NEXT: i32.const -8 2422; CHECK-NEXT: i32.add 2423; CHECK-NEXT: i64x2.load32x2_u 0 2424; CHECK-NEXT: # fallthrough-return 2425 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1 2426 %v = load <2 x i32>, ptr %s 2427 %v2 = zext <2 x i32> %v to <2 x i64> 2428 ret <2 x i64> %v2 2429} 2430 2431define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(ptr %p) { 2432; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset: 2433; CHECK: .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128) 2434; CHECK-NEXT: # %bb.0: 2435; CHECK-NEXT: local.get 0 2436; CHECK-NEXT: i32.const -8 2437; CHECK-NEXT: i32.add 2438; CHECK-NEXT: v128.load64_zero 0 2439; CHECK-NEXT: # fallthrough-return 2440 %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1 2441 %v = load <2 x i32>, ptr %s 2442 ret <2 x i32> %v 2443} 2444 2445define <2 x i64> @load_v2i64_with_unfolded_offset(ptr %p) { 2446; CHECK-LABEL: load_v2i64_with_unfolded_offset: 2447; CHECK: .functype load_v2i64_with_unfolded_offset (i32) -> (v128) 2448; CHECK-NEXT: # %bb.0: 2449; CHECK-NEXT: local.get 0 2450; CHECK-NEXT: i32.const 16 2451; CHECK-NEXT: i32.add 2452; CHECK-NEXT: v128.load 0 2453; CHECK-NEXT: # fallthrough-return 2454 %q = ptrtoint ptr %p to i32 2455 %r = add nsw i32 %q, 16 2456 %s = inttoptr i32 %r to ptr 2457 %v = load <2 x i64>, ptr %s 2458 ret <2 x i64> %v 2459} 2460 2461define <2 x i64> @load_splat_v2i64_with_unfolded_offset(ptr %p) { 2462; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset: 2463; CHECK: .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128) 2464; CHECK-NEXT: # %bb.0: 2465; CHECK-NEXT: local.get 0 2466; CHECK-NEXT: i32.const 16 2467; CHECK-NEXT: i32.add 2468; CHECK-NEXT: v128.load64_splat 0 2469; CHECK-NEXT: # fallthrough-return 2470 %q = ptrtoint ptr %p to i32 2471 %r = add nsw i32 %q, 16 2472 %s = inttoptr i32 %r to ptr 2473 %e = load i64, ptr %s 2474 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2475 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2476 ret <2 x i64> %v2 2477} 2478 2479define <2 x i64> @load_sext_v2i64_with_unfolded_offset(ptr %p) { 2480; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset: 2481; CHECK: .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128) 2482; CHECK-NEXT: # %bb.0: 2483; CHECK-NEXT: local.get 0 2484; CHECK-NEXT: i32.const 16 2485; CHECK-NEXT: i32.add 2486; CHECK-NEXT: i64x2.load32x2_s 0 2487; CHECK-NEXT: # fallthrough-return 2488 %q = ptrtoint ptr %p to i32 2489 %r = add nsw i32 %q, 16 2490 %s = inttoptr i32 %r to ptr 2491 %v = load <2 x i32>, ptr %s 2492 %v2 = sext <2 x i32> %v to <2 x i64> 2493 ret <2 x i64> %v2 2494} 2495 2496define <2 x i64> @load_zext_v2i64_with_unfolded_offset(ptr %p) { 2497; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset: 2498; CHECK: .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128) 2499; CHECK-NEXT: # %bb.0: 2500; CHECK-NEXT: local.get 0 2501; CHECK-NEXT: i32.const 16 2502; CHECK-NEXT: i32.add 2503; CHECK-NEXT: i64x2.load32x2_u 0 2504; CHECK-NEXT: # fallthrough-return 2505 %q = ptrtoint ptr %p to i32 2506 %r = add nsw i32 %q, 16 2507 %s = inttoptr i32 %r to ptr 2508 %v = load <2 x i32>, ptr %s 2509 %v2 = zext <2 x i32> %v to <2 x i64> 2510 ret <2 x i64> %v2 2511} 2512 2513define <2 x i32> @load_ext_v2i64_with_unfolded_offset(ptr %p) { 2514; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset: 2515; CHECK: .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128) 2516; CHECK-NEXT: # %bb.0: 2517; CHECK-NEXT: local.get 0 2518; CHECK-NEXT: i32.const 16 2519; CHECK-NEXT: i32.add 2520; CHECK-NEXT: v128.load64_zero 0 2521; CHECK-NEXT: # fallthrough-return 2522 %q = ptrtoint ptr %p to i32 2523 %r = add nsw i32 %q, 16 2524 %s = inttoptr i32 %r to ptr 2525 %v = load <2 x i32>, ptr %s 2526 ret <2 x i32> %v 2527} 2528 2529define <2 x i64> @load_v2i64_with_unfolded_gep_offset(ptr %p) { 2530; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset: 2531; CHECK: .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2532; CHECK-NEXT: # %bb.0: 2533; CHECK-NEXT: local.get 0 2534; CHECK-NEXT: i32.const 16 2535; CHECK-NEXT: i32.add 2536; CHECK-NEXT: v128.load 0 2537; CHECK-NEXT: # fallthrough-return 2538 %s = getelementptr <2 x i64>, ptr %p, i32 1 2539 %v = load <2 x i64>, ptr %s 2540 ret <2 x i64> %v 2541} 2542 2543define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(ptr %p) { 2544; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset: 2545; CHECK: .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2546; CHECK-NEXT: # %bb.0: 2547; CHECK-NEXT: local.get 0 2548; CHECK-NEXT: i32.const 8 2549; CHECK-NEXT: i32.add 2550; CHECK-NEXT: v128.load64_splat 0 2551; CHECK-NEXT: # fallthrough-return 2552 %s = getelementptr i64, ptr %p, i32 1 2553 %e = load i64, ptr %s 2554 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2555 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2556 ret <2 x i64> %v2 2557} 2558 2559define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(ptr %p) { 2560; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset: 2561; CHECK: .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2562; CHECK-NEXT: # %bb.0: 2563; CHECK-NEXT: local.get 0 2564; CHECK-NEXT: i32.const 8 2565; CHECK-NEXT: i32.add 2566; CHECK-NEXT: i64x2.load32x2_s 0 2567; CHECK-NEXT: # fallthrough-return 2568 %s = getelementptr <2 x i32>, ptr %p, i32 1 2569 %v = load <2 x i32>, ptr %s 2570 %v2 = sext <2 x i32> %v to <2 x i64> 2571 ret <2 x i64> %v2 2572} 2573 2574define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(ptr %p) { 2575; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset: 2576; CHECK: .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2577; CHECK-NEXT: # %bb.0: 2578; CHECK-NEXT: local.get 0 2579; CHECK-NEXT: i32.const 8 2580; CHECK-NEXT: i32.add 2581; CHECK-NEXT: i64x2.load32x2_u 0 2582; CHECK-NEXT: # fallthrough-return 2583 %s = getelementptr <2 x i32>, ptr %p, i32 1 2584 %v = load <2 x i32>, ptr %s 2585 %v2 = zext <2 x i32> %v to <2 x i64> 2586 ret <2 x i64> %v2 2587} 2588 2589define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(ptr %p) { 2590; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset: 2591; CHECK: .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128) 2592; CHECK-NEXT: # %bb.0: 2593; CHECK-NEXT: local.get 0 2594; CHECK-NEXT: i32.const 8 2595; CHECK-NEXT: i32.add 2596; CHECK-NEXT: v128.load64_zero 0 2597; CHECK-NEXT: # fallthrough-return 2598 %s = getelementptr <2 x i32>, ptr %p, i32 1 2599 %v = load <2 x i32>, ptr %s 2600 ret <2 x i32> %v 2601} 2602 2603define <2 x i64> @load_v2i64_from_numeric_address() { 2604; CHECK-LABEL: load_v2i64_from_numeric_address: 2605; CHECK: .functype load_v2i64_from_numeric_address () -> (v128) 2606; CHECK-NEXT: # %bb.0: 2607; CHECK-NEXT: i32.const 0 2608; CHECK-NEXT: v128.load 32 2609; CHECK-NEXT: # fallthrough-return 2610 %s = inttoptr i32 32 to ptr 2611 %v = load <2 x i64>, ptr %s 2612 ret <2 x i64> %v 2613} 2614 2615define <2 x i64> @load_splat_v2i64_from_numeric_address() { 2616; CHECK-LABEL: load_splat_v2i64_from_numeric_address: 2617; CHECK: .functype load_splat_v2i64_from_numeric_address () -> (v128) 2618; CHECK-NEXT: # %bb.0: 2619; CHECK-NEXT: i32.const 0 2620; CHECK-NEXT: v128.load64_splat 32 2621; CHECK-NEXT: # fallthrough-return 2622 %s = inttoptr i32 32 to ptr 2623 %e = load i64, ptr %s 2624 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2625 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2626 ret <2 x i64> %v2 2627} 2628 2629define <2 x i64> @load_sext_v2i64_from_numeric_address() { 2630; CHECK-LABEL: load_sext_v2i64_from_numeric_address: 2631; CHECK: .functype load_sext_v2i64_from_numeric_address () -> (v128) 2632; CHECK-NEXT: # %bb.0: 2633; CHECK-NEXT: i32.const 0 2634; CHECK-NEXT: i64x2.load32x2_s 32 2635; CHECK-NEXT: # fallthrough-return 2636 %s = inttoptr i32 32 to ptr 2637 %v = load <2 x i32>, ptr %s 2638 %v2 = sext <2 x i32> %v to <2 x i64> 2639 ret <2 x i64> %v2 2640} 2641 2642define <2 x i64> @load_zext_v2i64_from_numeric_address() { 2643; CHECK-LABEL: load_zext_v2i64_from_numeric_address: 2644; CHECK: .functype load_zext_v2i64_from_numeric_address () -> (v128) 2645; CHECK-NEXT: # %bb.0: 2646; CHECK-NEXT: i32.const 0 2647; CHECK-NEXT: i64x2.load32x2_u 32 2648; CHECK-NEXT: # fallthrough-return 2649 %s = inttoptr i32 32 to ptr 2650 %v = load <2 x i32>, ptr %s 2651 %v2 = zext <2 x i32> %v to <2 x i64> 2652 ret <2 x i64> %v2 2653} 2654 2655define <2 x i32> @load_ext_v2i64_from_numeric_address() { 2656; CHECK-LABEL: load_ext_v2i64_from_numeric_address: 2657; CHECK: .functype load_ext_v2i64_from_numeric_address () -> (v128) 2658; CHECK-NEXT: # %bb.0: 2659; CHECK-NEXT: i32.const 0 2660; CHECK-NEXT: v128.load64_zero 32 2661; CHECK-NEXT: # fallthrough-return 2662 %s = inttoptr i32 32 to ptr 2663 %v = load <2 x i32>, ptr %s 2664 ret <2 x i32> %v 2665} 2666 2667@gv_v2i64 = global <2 x i64> <i64 42, i64 42> 2668define <2 x i64> @load_v2i64_from_global_address() { 2669; CHECK-LABEL: load_v2i64_from_global_address: 2670; CHECK: .functype load_v2i64_from_global_address () -> (v128) 2671; CHECK-NEXT: # %bb.0: 2672; CHECK-NEXT: i32.const 0 2673; CHECK-NEXT: v128.load gv_v2i64 2674; CHECK-NEXT: # fallthrough-return 2675 %v = load <2 x i64>, ptr @gv_v2i64 2676 ret <2 x i64> %v 2677} 2678 2679@gv_i64 = global i64 42 2680define <2 x i64> @load_splat_v2i64_from_global_address() { 2681; CHECK-LABEL: load_splat_v2i64_from_global_address: 2682; CHECK: .functype load_splat_v2i64_from_global_address () -> (v128) 2683; CHECK-NEXT: # %bb.0: 2684; CHECK-NEXT: i32.const 0 2685; CHECK-NEXT: v128.load64_splat gv_i64 2686; CHECK-NEXT: # fallthrough-return 2687 %e = load i64, ptr @gv_i64 2688 %v1 = insertelement <2 x i64> undef, i64 %e, i32 0 2689 %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 2690 ret <2 x i64> %v2 2691} 2692 2693@gv_v2i32 = global <2 x i32> <i32 42, i32 42> 2694define <2 x i64> @load_sext_v2i64_from_global_address() { 2695; CHECK-LABEL: load_sext_v2i64_from_global_address: 2696; CHECK: .functype load_sext_v2i64_from_global_address () -> (v128) 2697; CHECK-NEXT: # %bb.0: 2698; CHECK-NEXT: i32.const 0 2699; CHECK-NEXT: i64x2.load32x2_s gv_v2i32 2700; CHECK-NEXT: # fallthrough-return 2701 %v = load <2 x i32>, ptr @gv_v2i32 2702 %v2 = sext <2 x i32> %v to <2 x i64> 2703 ret <2 x i64> %v2 2704} 2705 2706define <2 x i64> @load_zext_v2i64_from_global_address() { 2707; CHECK-LABEL: load_zext_v2i64_from_global_address: 2708; CHECK: .functype load_zext_v2i64_from_global_address () -> (v128) 2709; CHECK-NEXT: # %bb.0: 2710; CHECK-NEXT: i32.const 0 2711; CHECK-NEXT: i64x2.load32x2_u gv_v2i32 2712; CHECK-NEXT: # fallthrough-return 2713 %v = load <2 x i32>, ptr @gv_v2i32 2714 %v2 = zext <2 x i32> %v to <2 x i64> 2715 ret <2 x i64> %v2 2716} 2717 2718define <2 x i32> @load_ext_v2i64_from_global_address() { 2719; CHECK-LABEL: load_ext_v2i64_from_global_address: 2720; CHECK: .functype load_ext_v2i64_from_global_address () -> (v128) 2721; CHECK-NEXT: # %bb.0: 2722; CHECK-NEXT: i32.const 0 2723; CHECK-NEXT: v128.load64_zero gv_v2i32 2724; CHECK-NEXT: # fallthrough-return 2725 %v = load <2 x i32>, ptr @gv_v2i32 2726 ret <2 x i32> %v 2727} 2728 2729define void @store_v2i64(<2 x i64> %v, ptr %p) { 2730; CHECK-LABEL: store_v2i64: 2731; CHECK: .functype store_v2i64 (v128, i32) -> () 2732; CHECK-NEXT: # %bb.0: 2733; CHECK-NEXT: local.get 1 2734; CHECK-NEXT: local.get 0 2735; CHECK-NEXT: v128.store 0 2736; CHECK-NEXT: # fallthrough-return 2737 store <2 x i64> %v , ptr %p 2738 ret void 2739} 2740 2741define void @store_v2i64_with_folded_offset(<2 x i64> %v, ptr %p) { 2742; CHECK-LABEL: store_v2i64_with_folded_offset: 2743; CHECK: .functype store_v2i64_with_folded_offset (v128, i32) -> () 2744; CHECK-NEXT: # %bb.0: 2745; CHECK-NEXT: local.get 1 2746; CHECK-NEXT: local.get 0 2747; CHECK-NEXT: v128.store 16 2748; CHECK-NEXT: # fallthrough-return 2749 %q = ptrtoint ptr %p to i32 2750 %r = add nuw i32 %q, 16 2751 %s = inttoptr i32 %r to ptr 2752 store <2 x i64> %v , ptr %s 2753 ret void 2754} 2755 2756define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, ptr %p) { 2757; CHECK-LABEL: store_v2i64_with_folded_gep_offset: 2758; CHECK: .functype store_v2i64_with_folded_gep_offset (v128, i32) -> () 2759; CHECK-NEXT: # %bb.0: 2760; CHECK-NEXT: local.get 1 2761; CHECK-NEXT: local.get 0 2762; CHECK-NEXT: v128.store 16 2763; CHECK-NEXT: # fallthrough-return 2764 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 1 2765 store <2 x i64> %v , ptr %s 2766 ret void 2767} 2768 2769define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, ptr %p) { 2770; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset: 2771; CHECK: .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> () 2772; CHECK-NEXT: # %bb.0: 2773; CHECK-NEXT: local.get 1 2774; CHECK-NEXT: i32.const -16 2775; CHECK-NEXT: i32.add 2776; CHECK-NEXT: local.get 0 2777; CHECK-NEXT: v128.store 0 2778; CHECK-NEXT: # fallthrough-return 2779 %s = getelementptr inbounds <2 x i64>, ptr %p, i32 -1 2780 store <2 x i64> %v , ptr %s 2781 ret void 2782} 2783 2784define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, ptr %p) { 2785; CHECK-LABEL: store_v2i64_with_unfolded_offset: 2786; CHECK: .functype store_v2i64_with_unfolded_offset (v128, i32) -> () 2787; CHECK-NEXT: # %bb.0: 2788; CHECK-NEXT: local.get 1 2789; CHECK-NEXT: i32.const 16 2790; CHECK-NEXT: i32.add 2791; CHECK-NEXT: local.get 0 2792; CHECK-NEXT: v128.store 0 2793; CHECK-NEXT: # fallthrough-return 2794 %q = ptrtoint ptr %p to i32 2795 %r = add nsw i32 %q, 16 2796 %s = inttoptr i32 %r to ptr 2797 store <2 x i64> %v , ptr %s 2798 ret void 2799} 2800 2801define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, ptr %p) { 2802; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset: 2803; CHECK: .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> () 2804; CHECK-NEXT: # %bb.0: 2805; CHECK-NEXT: local.get 1 2806; CHECK-NEXT: i32.const 16 2807; CHECK-NEXT: i32.add 2808; CHECK-NEXT: local.get 0 2809; CHECK-NEXT: v128.store 0 2810; CHECK-NEXT: # fallthrough-return 2811 %s = getelementptr <2 x i64>, ptr %p, i32 1 2812 store <2 x i64> %v , ptr %s 2813 ret void 2814} 2815 2816define void @store_v2i64_to_numeric_address(<2 x i64> %v) { 2817; CHECK-LABEL: store_v2i64_to_numeric_address: 2818; CHECK: .functype store_v2i64_to_numeric_address (v128) -> () 2819; CHECK-NEXT: # %bb.0: 2820; CHECK-NEXT: i32.const 0 2821; CHECK-NEXT: local.get 0 2822; CHECK-NEXT: v128.store 32 2823; CHECK-NEXT: # fallthrough-return 2824 %s = inttoptr i32 32 to ptr 2825 store <2 x i64> %v , ptr %s 2826 ret void 2827} 2828 2829define void @store_v2i64_to_global_address(<2 x i64> %v) { 2830; CHECK-LABEL: store_v2i64_to_global_address: 2831; CHECK: .functype store_v2i64_to_global_address (v128) -> () 2832; CHECK-NEXT: # %bb.0: 2833; CHECK-NEXT: i32.const 0 2834; CHECK-NEXT: local.get 0 2835; CHECK-NEXT: v128.store gv_v2i64 2836; CHECK-NEXT: # fallthrough-return 2837 store <2 x i64> %v , ptr @gv_v2i64 2838 ret void 2839} 2840 2841; ============================================================================== 2842; 4 x float 2843; ============================================================================== 2844define <4 x float> @load_v4f32(ptr %p) { 2845; CHECK-LABEL: load_v4f32: 2846; CHECK: .functype load_v4f32 (i32) -> (v128) 2847; CHECK-NEXT: # %bb.0: 2848; CHECK-NEXT: local.get 0 2849; CHECK-NEXT: v128.load 0 2850; CHECK-NEXT: # fallthrough-return 2851 %v = load <4 x float>, ptr %p 2852 ret <4 x float> %v 2853} 2854 2855define <4 x float> @load_splat_v4f32(ptr %p) { 2856; CHECK-LABEL: load_splat_v4f32: 2857; CHECK: .functype load_splat_v4f32 (i32) -> (v128) 2858; CHECK-NEXT: # %bb.0: 2859; CHECK-NEXT: local.get 0 2860; CHECK-NEXT: v128.load32_splat 0 2861; CHECK-NEXT: # fallthrough-return 2862 %e = load float, ptr %p 2863 %v1 = insertelement <4 x float> undef, float %e, i32 0 2864 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2865 ret <4 x float> %v2 2866} 2867 2868define <4 x float> @load_v4f32_with_folded_offset(ptr %p) { 2869; CHECK-LABEL: load_v4f32_with_folded_offset: 2870; CHECK: .functype load_v4f32_with_folded_offset (i32) -> (v128) 2871; CHECK-NEXT: # %bb.0: 2872; CHECK-NEXT: local.get 0 2873; CHECK-NEXT: v128.load 16 2874; CHECK-NEXT: # fallthrough-return 2875 %q = ptrtoint ptr %p to i32 2876 %r = add nuw i32 %q, 16 2877 %s = inttoptr i32 %r to ptr 2878 %v = load <4 x float>, ptr %s 2879 ret <4 x float> %v 2880} 2881 2882define <4 x float> @load_splat_v4f32_with_folded_offset(ptr %p) { 2883; CHECK-LABEL: load_splat_v4f32_with_folded_offset: 2884; CHECK: .functype load_splat_v4f32_with_folded_offset (i32) -> (v128) 2885; CHECK-NEXT: # %bb.0: 2886; CHECK-NEXT: local.get 0 2887; CHECK-NEXT: v128.load32_splat 16 2888; CHECK-NEXT: # fallthrough-return 2889 %q = ptrtoint ptr %p to i32 2890 %r = add nuw i32 %q, 16 2891 %s = inttoptr i32 %r to ptr 2892 %e = load float, ptr %s 2893 %v1 = insertelement <4 x float> undef, float %e, i32 0 2894 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2895 ret <4 x float> %v2 2896} 2897 2898define <4 x float> @load_v4f32_with_folded_gep_offset(ptr %p) { 2899; CHECK-LABEL: load_v4f32_with_folded_gep_offset: 2900; CHECK: .functype load_v4f32_with_folded_gep_offset (i32) -> (v128) 2901; CHECK-NEXT: # %bb.0: 2902; CHECK-NEXT: local.get 0 2903; CHECK-NEXT: v128.load 16 2904; CHECK-NEXT: # fallthrough-return 2905 %s = getelementptr inbounds <4 x float>, ptr %p, i32 1 2906 %v = load <4 x float>, ptr %s 2907 ret <4 x float> %v 2908} 2909 2910define <4 x float> @load_splat_v4f32_with_folded_gep_offset(ptr %p) { 2911; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset: 2912; CHECK: .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128) 2913; CHECK-NEXT: # %bb.0: 2914; CHECK-NEXT: local.get 0 2915; CHECK-NEXT: v128.load32_splat 4 2916; CHECK-NEXT: # fallthrough-return 2917 %s = getelementptr inbounds float, ptr %p, i32 1 2918 %e = load float, ptr %s 2919 %v1 = insertelement <4 x float> undef, float %e, i32 0 2920 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2921 ret <4 x float> %v2 2922} 2923 2924define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(ptr %p) { 2925; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset: 2926; CHECK: .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2927; CHECK-NEXT: # %bb.0: 2928; CHECK-NEXT: local.get 0 2929; CHECK-NEXT: i32.const -16 2930; CHECK-NEXT: i32.add 2931; CHECK-NEXT: v128.load 0 2932; CHECK-NEXT: # fallthrough-return 2933 %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1 2934 %v = load <4 x float>, ptr %s 2935 ret <4 x float> %v 2936} 2937 2938define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(ptr %p) { 2939; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset: 2940; CHECK: .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128) 2941; CHECK-NEXT: # %bb.0: 2942; CHECK-NEXT: local.get 0 2943; CHECK-NEXT: i32.const -4 2944; CHECK-NEXT: i32.add 2945; CHECK-NEXT: v128.load32_splat 0 2946; CHECK-NEXT: # fallthrough-return 2947 %s = getelementptr inbounds float, ptr %p, i32 -1 2948 %e = load float, ptr %s 2949 %v1 = insertelement <4 x float> undef, float %e, i32 0 2950 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2951 ret <4 x float> %v2 2952} 2953 2954define <4 x float> @load_v4f32_with_unfolded_offset(ptr %p) { 2955; CHECK-LABEL: load_v4f32_with_unfolded_offset: 2956; CHECK: .functype load_v4f32_with_unfolded_offset (i32) -> (v128) 2957; CHECK-NEXT: # %bb.0: 2958; CHECK-NEXT: local.get 0 2959; CHECK-NEXT: i32.const 16 2960; CHECK-NEXT: i32.add 2961; CHECK-NEXT: v128.load 0 2962; CHECK-NEXT: # fallthrough-return 2963 %q = ptrtoint ptr %p to i32 2964 %r = add nsw i32 %q, 16 2965 %s = inttoptr i32 %r to ptr 2966 %v = load <4 x float>, ptr %s 2967 ret <4 x float> %v 2968} 2969 2970define <4 x float> @load_splat_v4f32_with_unfolded_offset(ptr %p) { 2971; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset: 2972; CHECK: .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128) 2973; CHECK-NEXT: # %bb.0: 2974; CHECK-NEXT: local.get 0 2975; CHECK-NEXT: i32.const 16 2976; CHECK-NEXT: i32.add 2977; CHECK-NEXT: v128.load32_splat 0 2978; CHECK-NEXT: # fallthrough-return 2979 %q = ptrtoint ptr %p to i32 2980 %r = add nsw i32 %q, 16 2981 %s = inttoptr i32 %r to ptr 2982 %e = load float, ptr %s 2983 %v1 = insertelement <4 x float> undef, float %e, i32 0 2984 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 2985 ret <4 x float> %v2 2986} 2987 2988define <4 x float> @load_v4f32_with_unfolded_gep_offset(ptr %p) { 2989; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset: 2990; CHECK: .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128) 2991; CHECK-NEXT: # %bb.0: 2992; CHECK-NEXT: local.get 0 2993; CHECK-NEXT: i32.const 16 2994; CHECK-NEXT: i32.add 2995; CHECK-NEXT: v128.load 0 2996; CHECK-NEXT: # fallthrough-return 2997 %s = getelementptr <4 x float>, ptr %p, i32 1 2998 %v = load <4 x float>, ptr %s 2999 ret <4 x float> %v 3000} 3001 3002define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(ptr %p) { 3003; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset: 3004; CHECK: .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128) 3005; CHECK-NEXT: # %bb.0: 3006; CHECK-NEXT: local.get 0 3007; CHECK-NEXT: i32.const 4 3008; CHECK-NEXT: i32.add 3009; CHECK-NEXT: v128.load32_splat 0 3010; CHECK-NEXT: # fallthrough-return 3011 %s = getelementptr float, ptr %p, i32 1 3012 %e = load float, ptr %s 3013 %v1 = insertelement <4 x float> undef, float %e, i32 0 3014 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 3015 ret <4 x float> %v2 3016} 3017 3018define <4 x float> @load_v4f32_from_numeric_address() { 3019; CHECK-LABEL: load_v4f32_from_numeric_address: 3020; CHECK: .functype load_v4f32_from_numeric_address () -> (v128) 3021; CHECK-NEXT: # %bb.0: 3022; CHECK-NEXT: i32.const 0 3023; CHECK-NEXT: v128.load 32 3024; CHECK-NEXT: # fallthrough-return 3025 %s = inttoptr i32 32 to ptr 3026 %v = load <4 x float>, ptr %s 3027 ret <4 x float> %v 3028} 3029 3030define <4 x float> @load_splat_v4f32_from_numeric_address() { 3031; CHECK-LABEL: load_splat_v4f32_from_numeric_address: 3032; CHECK: .functype load_splat_v4f32_from_numeric_address () -> (v128) 3033; CHECK-NEXT: # %bb.0: 3034; CHECK-NEXT: i32.const 0 3035; CHECK-NEXT: v128.load32_splat 32 3036; CHECK-NEXT: # fallthrough-return 3037 %s = inttoptr i32 32 to ptr 3038 %e = load float, ptr %s 3039 %v1 = insertelement <4 x float> undef, float %e, i32 0 3040 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 3041 ret <4 x float> %v2 3042} 3043 3044@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.> 3045define <4 x float> @load_v4f32_from_global_address() { 3046; CHECK-LABEL: load_v4f32_from_global_address: 3047; CHECK: .functype load_v4f32_from_global_address () -> (v128) 3048; CHECK-NEXT: # %bb.0: 3049; CHECK-NEXT: i32.const 0 3050; CHECK-NEXT: v128.load gv_v4f32 3051; CHECK-NEXT: # fallthrough-return 3052 %v = load <4 x float>, ptr @gv_v4f32 3053 ret <4 x float> %v 3054} 3055 3056@gv_f32 = global float 42. 3057define <4 x float> @load_splat_v4f32_from_global_address() { 3058; CHECK-LABEL: load_splat_v4f32_from_global_address: 3059; CHECK: .functype load_splat_v4f32_from_global_address () -> (v128) 3060; CHECK-NEXT: # %bb.0: 3061; CHECK-NEXT: i32.const 0 3062; CHECK-NEXT: v128.load32_splat gv_f32 3063; CHECK-NEXT: # fallthrough-return 3064 %e = load float, ptr @gv_f32 3065 %v1 = insertelement <4 x float> undef, float %e, i32 0 3066 %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer 3067 ret <4 x float> %v2 3068} 3069 3070define void @store_v4f32(<4 x float> %v, ptr %p) { 3071; CHECK-LABEL: store_v4f32: 3072; CHECK: .functype store_v4f32 (v128, i32) -> () 3073; CHECK-NEXT: # %bb.0: 3074; CHECK-NEXT: local.get 1 3075; CHECK-NEXT: local.get 0 3076; CHECK-NEXT: v128.store 0 3077; CHECK-NEXT: # fallthrough-return 3078 store <4 x float> %v , ptr %p 3079 ret void 3080} 3081 3082define void @store_v4f32_with_folded_offset(<4 x float> %v, ptr %p) { 3083; CHECK-LABEL: store_v4f32_with_folded_offset: 3084; CHECK: .functype store_v4f32_with_folded_offset (v128, i32) -> () 3085; CHECK-NEXT: # %bb.0: 3086; CHECK-NEXT: local.get 1 3087; CHECK-NEXT: local.get 0 3088; CHECK-NEXT: v128.store 16 3089; CHECK-NEXT: # fallthrough-return 3090 %q = ptrtoint ptr %p to i32 3091 %r = add nuw i32 %q, 16 3092 %s = inttoptr i32 %r to ptr 3093 store <4 x float> %v , ptr %s 3094 ret void 3095} 3096 3097define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, ptr %p) { 3098; CHECK-LABEL: store_v4f32_with_folded_gep_offset: 3099; CHECK: .functype store_v4f32_with_folded_gep_offset (v128, i32) -> () 3100; CHECK-NEXT: # %bb.0: 3101; CHECK-NEXT: local.get 1 3102; CHECK-NEXT: local.get 0 3103; CHECK-NEXT: v128.store 16 3104; CHECK-NEXT: # fallthrough-return 3105 %s = getelementptr inbounds <4 x float>, ptr %p, i32 1 3106 store <4 x float> %v , ptr %s 3107 ret void 3108} 3109 3110define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, ptr %p) { 3111; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset: 3112; CHECK: .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> () 3113; CHECK-NEXT: # %bb.0: 3114; CHECK-NEXT: local.get 1 3115; CHECK-NEXT: i32.const -16 3116; CHECK-NEXT: i32.add 3117; CHECK-NEXT: local.get 0 3118; CHECK-NEXT: v128.store 0 3119; CHECK-NEXT: # fallthrough-return 3120 %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1 3121 store <4 x float> %v , ptr %s 3122 ret void 3123} 3124 3125define void @store_v4f32_with_unfolded_offset(<4 x float> %v, ptr %p) { 3126; CHECK-LABEL: store_v4f32_with_unfolded_offset: 3127; CHECK: .functype store_v4f32_with_unfolded_offset (v128, i32) -> () 3128; CHECK-NEXT: # %bb.0: 3129; CHECK-NEXT: local.get 1 3130; CHECK-NEXT: i32.const 16 3131; CHECK-NEXT: i32.add 3132; CHECK-NEXT: local.get 0 3133; CHECK-NEXT: v128.store 0 3134; CHECK-NEXT: # fallthrough-return 3135 %q = ptrtoint ptr %p to i32 3136 %r = add nsw i32 %q, 16 3137 %s = inttoptr i32 %r to ptr 3138 store <4 x float> %v , ptr %s 3139 ret void 3140} 3141 3142define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, ptr %p) { 3143; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset: 3144; CHECK: .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> () 3145; CHECK-NEXT: # %bb.0: 3146; CHECK-NEXT: local.get 1 3147; CHECK-NEXT: i32.const 16 3148; CHECK-NEXT: i32.add 3149; CHECK-NEXT: local.get 0 3150; CHECK-NEXT: v128.store 0 3151; CHECK-NEXT: # fallthrough-return 3152 %s = getelementptr <4 x float>, ptr %p, i32 1 3153 store <4 x float> %v , ptr %s 3154 ret void 3155} 3156 3157define void @store_v4f32_to_numeric_address(<4 x float> %v) { 3158; CHECK-LABEL: store_v4f32_to_numeric_address: 3159; CHECK: .functype store_v4f32_to_numeric_address (v128) -> () 3160; CHECK-NEXT: # %bb.0: 3161; CHECK-NEXT: i32.const 0 3162; CHECK-NEXT: local.get 0 3163; CHECK-NEXT: v128.store 32 3164; CHECK-NEXT: # fallthrough-return 3165 %s = inttoptr i32 32 to ptr 3166 store <4 x float> %v , ptr %s 3167 ret void 3168} 3169 3170define void @store_v4f32_to_global_address(<4 x float> %v) { 3171; CHECK-LABEL: store_v4f32_to_global_address: 3172; CHECK: .functype store_v4f32_to_global_address (v128) -> () 3173; CHECK-NEXT: # %bb.0: 3174; CHECK-NEXT: i32.const 0 3175; CHECK-NEXT: local.get 0 3176; CHECK-NEXT: v128.store gv_v4f32 3177; CHECK-NEXT: # fallthrough-return 3178 store <4 x float> %v , ptr @gv_v4f32 3179 ret void 3180} 3181 3182; ============================================================================== 3183; 2 x double 3184; ============================================================================== 3185define <2 x double> @load_v2f64(ptr %p) { 3186; CHECK-LABEL: load_v2f64: 3187; CHECK: .functype load_v2f64 (i32) -> (v128) 3188; CHECK-NEXT: # %bb.0: 3189; CHECK-NEXT: local.get 0 3190; CHECK-NEXT: v128.load 0 3191; CHECK-NEXT: # fallthrough-return 3192 %v = load <2 x double>, ptr %p 3193 ret <2 x double> %v 3194} 3195 3196define <2 x double> @load_splat_v2f64(ptr %p) { 3197; CHECK-LABEL: load_splat_v2f64: 3198; CHECK: .functype load_splat_v2f64 (i32) -> (v128) 3199; CHECK-NEXT: # %bb.0: 3200; CHECK-NEXT: local.get 0 3201; CHECK-NEXT: v128.load64_splat 0 3202; CHECK-NEXT: # fallthrough-return 3203 %e = load double, ptr %p 3204 %v1 = insertelement <2 x double> undef, double %e, i32 0 3205 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3206 ret <2 x double> %v2 3207} 3208 3209define <2 x double> @load_promote_v2f64(ptr %p) { 3210; CHECK-LABEL: load_promote_v2f64: 3211; CHECK: .functype load_promote_v2f64 (i32) -> (v128) 3212; CHECK-NEXT: # %bb.0: 3213; CHECK-NEXT: local.get 0 3214; CHECK-NEXT: v128.load64_zero 0 3215; CHECK-NEXT: f64x2.promote_low_f32x4 3216; CHECK-NEXT: # fallthrough-return 3217 %e = load <2 x float>, ptr %p 3218 %v = fpext <2 x float> %e to <2 x double> 3219 ret <2 x double> %v 3220} 3221 3222define <2 x double> @load_v2f64_with_folded_offset(ptr %p) { 3223; CHECK-LABEL: load_v2f64_with_folded_offset: 3224; CHECK: .functype load_v2f64_with_folded_offset (i32) -> (v128) 3225; CHECK-NEXT: # %bb.0: 3226; CHECK-NEXT: local.get 0 3227; CHECK-NEXT: v128.load 16 3228; CHECK-NEXT: # fallthrough-return 3229 %q = ptrtoint ptr %p to i32 3230 %r = add nuw i32 %q, 16 3231 %s = inttoptr i32 %r to ptr 3232 %v = load <2 x double>, ptr %s 3233 ret <2 x double> %v 3234} 3235 3236define <2 x double> @load_splat_v2f64_with_folded_offset(ptr %p) { 3237; CHECK-LABEL: load_splat_v2f64_with_folded_offset: 3238; CHECK: .functype load_splat_v2f64_with_folded_offset (i32) -> (v128) 3239; CHECK-NEXT: # %bb.0: 3240; CHECK-NEXT: local.get 0 3241; CHECK-NEXT: v128.load64_splat 16 3242; CHECK-NEXT: # fallthrough-return 3243 %q = ptrtoint ptr %p to i32 3244 %r = add nuw i32 %q, 16 3245 %s = inttoptr i32 %r to ptr 3246 %e = load double, ptr %s 3247 %v1 = insertelement <2 x double> undef, double %e, i32 0 3248 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3249 ret <2 x double> %v2 3250} 3251 3252define <2 x double> @load_promote_v2f64_with_folded_offset(ptr %p) { 3253; CHECK-LABEL: load_promote_v2f64_with_folded_offset: 3254; CHECK: .functype load_promote_v2f64_with_folded_offset (i32) -> (v128) 3255; CHECK-NEXT: # %bb.0: 3256; CHECK-NEXT: local.get 0 3257; CHECK-NEXT: i32.const 16 3258; CHECK-NEXT: i32.add 3259; CHECK-NEXT: v128.load64_zero 0 3260; CHECK-NEXT: f64x2.promote_low_f32x4 3261; CHECK-NEXT: # fallthrough-return 3262 %q = ptrtoint ptr %p to i32 3263 %r = add nuw i32 %q, 16 3264 %s = inttoptr i32 %r to ptr 3265 %e = load <2 x float>, ptr %s 3266 %v = fpext <2 x float> %e to <2 x double> 3267 ret <2 x double> %v 3268} 3269 3270define <2 x double> @load_v2f64_with_folded_gep_offset(ptr %p) { 3271; CHECK-LABEL: load_v2f64_with_folded_gep_offset: 3272; CHECK: .functype load_v2f64_with_folded_gep_offset (i32) -> (v128) 3273; CHECK-NEXT: # %bb.0: 3274; CHECK-NEXT: local.get 0 3275; CHECK-NEXT: v128.load 16 3276; CHECK-NEXT: # fallthrough-return 3277 %s = getelementptr inbounds <2 x double>, ptr %p, i32 1 3278 %v = load <2 x double>, ptr %s 3279 ret <2 x double> %v 3280} 3281 3282define <2 x double> @load_splat_v2f64_with_folded_gep_offset(ptr %p) { 3283; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset: 3284; CHECK: .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128) 3285; CHECK-NEXT: # %bb.0: 3286; CHECK-NEXT: local.get 0 3287; CHECK-NEXT: v128.load64_splat 8 3288; CHECK-NEXT: # fallthrough-return 3289 %s = getelementptr inbounds double, ptr %p, i32 1 3290 %e = load double, ptr %s 3291 %v1 = insertelement <2 x double> undef, double %e, i32 0 3292 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3293 ret <2 x double> %v2 3294} 3295 3296define <2 x double> @load_promote_v2f64_with_folded_gep_offset(ptr %p) { 3297; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset: 3298; CHECK: .functype load_promote_v2f64_with_folded_gep_offset (i32) -> (v128) 3299; CHECK-NEXT: # %bb.0: 3300; CHECK-NEXT: local.get 0 3301; CHECK-NEXT: i32.const 8 3302; CHECK-NEXT: i32.add 3303; CHECK-NEXT: v128.load64_zero 0 3304; CHECK-NEXT: f64x2.promote_low_f32x4 3305; CHECK-NEXT: # fallthrough-return 3306 %s = getelementptr inbounds <2 x float>, ptr %p, i32 1 3307 %e = load <2 x float>, ptr %s 3308 %v = fpext <2 x float> %e to <2 x double> 3309 ret <2 x double> %v 3310} 3311 3312define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(ptr %p) { 3313; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset: 3314; CHECK: .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3315; CHECK-NEXT: # %bb.0: 3316; CHECK-NEXT: local.get 0 3317; CHECK-NEXT: i32.const -16 3318; CHECK-NEXT: i32.add 3319; CHECK-NEXT: v128.load 0 3320; CHECK-NEXT: # fallthrough-return 3321 %s = getelementptr inbounds <2 x double>, ptr %p, i32 -1 3322 %v = load <2 x double>, ptr %s 3323 ret <2 x double> %v 3324} 3325 3326define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(ptr %p) { 3327; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset: 3328; CHECK: .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3329; CHECK-NEXT: # %bb.0: 3330; CHECK-NEXT: local.get 0 3331; CHECK-NEXT: i32.const -8 3332; CHECK-NEXT: i32.add 3333; CHECK-NEXT: v128.load64_splat 0 3334; CHECK-NEXT: # fallthrough-return 3335 %s = getelementptr inbounds double, ptr %p, i32 -1 3336 %e = load double, ptr %s 3337 %v1 = insertelement <2 x double> undef, double %e, i32 0 3338 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3339 ret <2 x double> %v2 3340} 3341 3342define <2 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(ptr %p) { 3343; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset: 3344; CHECK: .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128) 3345; CHECK-NEXT: # %bb.0: 3346; CHECK-NEXT: local.get 0 3347; CHECK-NEXT: i32.const -8 3348; CHECK-NEXT: i32.add 3349; CHECK-NEXT: v128.load64_zero 0 3350; CHECK-NEXT: f64x2.promote_low_f32x4 3351; CHECK-NEXT: # fallthrough-return 3352 %s = getelementptr inbounds <2 x float>, ptr %p, i32 -1 3353 %e = load <2 x float>, ptr %s 3354 %v = fpext <2 x float> %e to <2 x double> 3355 ret <2 x double> %v 3356} 3357 3358define <2 x double> @load_v2f64_with_unfolded_offset(ptr %p) { 3359; CHECK-LABEL: load_v2f64_with_unfolded_offset: 3360; CHECK: .functype load_v2f64_with_unfolded_offset (i32) -> (v128) 3361; CHECK-NEXT: # %bb.0: 3362; CHECK-NEXT: local.get 0 3363; CHECK-NEXT: i32.const 16 3364; CHECK-NEXT: i32.add 3365; CHECK-NEXT: v128.load 0 3366; CHECK-NEXT: # fallthrough-return 3367 %q = ptrtoint ptr %p to i32 3368 %r = add nsw i32 %q, 16 3369 %s = inttoptr i32 %r to ptr 3370 %v = load <2 x double>, ptr %s 3371 ret <2 x double> %v 3372} 3373 3374define <2 x double> @load_splat_v2f64_with_unfolded_offset(ptr %p) { 3375; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset: 3376; CHECK: .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128) 3377; CHECK-NEXT: # %bb.0: 3378; CHECK-NEXT: local.get 0 3379; CHECK-NEXT: i32.const 16 3380; CHECK-NEXT: i32.add 3381; CHECK-NEXT: v128.load64_splat 0 3382; CHECK-NEXT: # fallthrough-return 3383 %q = ptrtoint ptr %p to i32 3384 %r = add nsw i32 %q, 16 3385 %s = inttoptr i32 %r to ptr 3386 %e = load double, ptr %s 3387 %v1 = insertelement <2 x double> undef, double %e, i32 0 3388 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3389 ret <2 x double> %v2 3390} 3391 3392define <2 x double> @load_promote_v2f64_with_unfolded_offset(ptr %p) { 3393; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset: 3394; CHECK: .functype load_promote_v2f64_with_unfolded_offset (i32) -> (v128) 3395; CHECK-NEXT: # %bb.0: 3396; CHECK-NEXT: local.get 0 3397; CHECK-NEXT: i32.const 16 3398; CHECK-NEXT: i32.add 3399; CHECK-NEXT: v128.load64_zero 0 3400; CHECK-NEXT: f64x2.promote_low_f32x4 3401; CHECK-NEXT: # fallthrough-return 3402 %q = ptrtoint ptr %p to i32 3403 %r = add nsw i32 %q, 16 3404 %s = inttoptr i32 %r to ptr 3405 %e = load <2 x float>, ptr %s 3406 %v = fpext <2 x float> %e to <2 x double> 3407 ret <2 x double> %v 3408} 3409 3410define <2 x double> @load_v2f64_with_unfolded_gep_offset(ptr %p) { 3411; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset: 3412; CHECK: .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3413; CHECK-NEXT: # %bb.0: 3414; CHECK-NEXT: local.get 0 3415; CHECK-NEXT: i32.const 16 3416; CHECK-NEXT: i32.add 3417; CHECK-NEXT: v128.load 0 3418; CHECK-NEXT: # fallthrough-return 3419 %s = getelementptr <2 x double>, ptr %p, i32 1 3420 %v = load <2 x double>, ptr %s 3421 ret <2 x double> %v 3422} 3423 3424define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(ptr %p) { 3425; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset: 3426; CHECK: .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3427; CHECK-NEXT: # %bb.0: 3428; CHECK-NEXT: local.get 0 3429; CHECK-NEXT: i32.const 8 3430; CHECK-NEXT: i32.add 3431; CHECK-NEXT: v128.load64_splat 0 3432; CHECK-NEXT: # fallthrough-return 3433 %s = getelementptr double, ptr %p, i32 1 3434 %e = load double, ptr %s 3435 %v1 = insertelement <2 x double> undef, double %e, i32 0 3436 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3437 ret <2 x double> %v2 3438} 3439 3440define <2 x double> @load_promote_v2f64_with_unfolded_gep_offset(ptr %p) { 3441; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset: 3442; CHECK: .functype load_promote_v2f64_with_unfolded_gep_offset (i32) -> (v128) 3443; CHECK-NEXT: # %bb.0: 3444; CHECK-NEXT: local.get 0 3445; CHECK-NEXT: i32.const 8 3446; CHECK-NEXT: i32.add 3447; CHECK-NEXT: v128.load64_zero 0 3448; CHECK-NEXT: f64x2.promote_low_f32x4 3449; CHECK-NEXT: # fallthrough-return 3450 %s = getelementptr <2 x float>, ptr %p, i32 1 3451 %e = load <2 x float>, ptr %s 3452 %v = fpext <2 x float> %e to <2 x double> 3453 ret <2 x double> %v 3454} 3455 3456define <2 x double> @load_v2f64_from_numeric_address() { 3457; CHECK-LABEL: load_v2f64_from_numeric_address: 3458; CHECK: .functype load_v2f64_from_numeric_address () -> (v128) 3459; CHECK-NEXT: # %bb.0: 3460; CHECK-NEXT: i32.const 0 3461; CHECK-NEXT: v128.load 32 3462; CHECK-NEXT: # fallthrough-return 3463 %s = inttoptr i32 32 to ptr 3464 %v = load <2 x double>, ptr %s 3465 ret <2 x double> %v 3466} 3467 3468define <2 x double> @load_splat_v2f64_from_numeric_address() { 3469; CHECK-LABEL: load_splat_v2f64_from_numeric_address: 3470; CHECK: .functype load_splat_v2f64_from_numeric_address () -> (v128) 3471; CHECK-NEXT: # %bb.0: 3472; CHECK-NEXT: i32.const 0 3473; CHECK-NEXT: v128.load64_splat 32 3474; CHECK-NEXT: # fallthrough-return 3475 %s = inttoptr i32 32 to ptr 3476 %e = load double, ptr %s 3477 %v1 = insertelement <2 x double> undef, double %e, i32 0 3478 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3479 ret <2 x double> %v2 3480} 3481 3482define <2 x double> @load_promote_v2f64_from_numeric_address() { 3483; CHECK-LABEL: load_promote_v2f64_from_numeric_address: 3484; CHECK: .functype load_promote_v2f64_from_numeric_address () -> (v128) 3485; CHECK-NEXT: # %bb.0: 3486; CHECK-NEXT: i32.const 32 3487; CHECK-NEXT: v128.load64_zero 0 3488; CHECK-NEXT: f64x2.promote_low_f32x4 3489; CHECK-NEXT: # fallthrough-return 3490 %s = inttoptr i32 32 to ptr 3491 %e = load <2 x float>, ptr %s 3492 %v = fpext <2 x float> %e to <2 x double> 3493 ret <2 x double> %v 3494} 3495 3496@gv_v2f64 = global <2 x double> <double 42., double 42.> 3497define <2 x double> @load_v2f64_from_global_address() { 3498; CHECK-LABEL: load_v2f64_from_global_address: 3499; CHECK: .functype load_v2f64_from_global_address () -> (v128) 3500; CHECK-NEXT: # %bb.0: 3501; CHECK-NEXT: i32.const 0 3502; CHECK-NEXT: v128.load gv_v2f64 3503; CHECK-NEXT: # fallthrough-return 3504 %v = load <2 x double>, ptr @gv_v2f64 3505 ret <2 x double> %v 3506} 3507 3508@gv_f64 = global double 42. 3509define <2 x double> @load_splat_v2f64_from_global_address() { 3510; CHECK-LABEL: load_splat_v2f64_from_global_address: 3511; CHECK: .functype load_splat_v2f64_from_global_address () -> (v128) 3512; CHECK-NEXT: # %bb.0: 3513; CHECK-NEXT: i32.const 0 3514; CHECK-NEXT: v128.load64_splat gv_f64 3515; CHECK-NEXT: # fallthrough-return 3516 %e = load double, ptr @gv_f64 3517 %v1 = insertelement <2 x double> undef, double %e, i32 0 3518 %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer 3519 ret <2 x double> %v2 3520} 3521 3522@gv_v2f32 = global <2 x float> <float 42., float 42.> 3523define <2 x double> @load_promote_v2f64_from_global_address() { 3524; CHECK-LABEL: load_promote_v2f64_from_global_address: 3525; CHECK: .functype load_promote_v2f64_from_global_address () -> (v128) 3526; CHECK-NEXT: # %bb.0: 3527; CHECK-NEXT: i32.const gv_v2f32 3528; CHECK-NEXT: v128.load64_zero 0 3529; CHECK-NEXT: f64x2.promote_low_f32x4 3530; CHECK-NEXT: # fallthrough-return 3531 %e = load <2 x float>, ptr @gv_v2f32 3532 %v = fpext <2 x float> %e to <2 x double> 3533 ret <2 x double> %v 3534} 3535 3536define void @store_v2f64(<2 x double> %v, ptr %p) { 3537; CHECK-LABEL: store_v2f64: 3538; CHECK: .functype store_v2f64 (v128, i32) -> () 3539; CHECK-NEXT: # %bb.0: 3540; CHECK-NEXT: local.get 1 3541; CHECK-NEXT: local.get 0 3542; CHECK-NEXT: v128.store 0 3543; CHECK-NEXT: # fallthrough-return 3544 store <2 x double> %v , ptr %p 3545 ret void 3546} 3547 3548define void @store_v2f64_with_folded_offset(<2 x double> %v, ptr %p) { 3549; CHECK-LABEL: store_v2f64_with_folded_offset: 3550; CHECK: .functype store_v2f64_with_folded_offset (v128, i32) -> () 3551; CHECK-NEXT: # %bb.0: 3552; CHECK-NEXT: local.get 1 3553; CHECK-NEXT: local.get 0 3554; CHECK-NEXT: v128.store 16 3555; CHECK-NEXT: # fallthrough-return 3556 %q = ptrtoint ptr %p to i32 3557 %r = add nuw i32 %q, 16 3558 %s = inttoptr i32 %r to ptr 3559 store <2 x double> %v , ptr %s 3560 ret void 3561} 3562 3563define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, ptr %p) { 3564; CHECK-LABEL: store_v2f64_with_folded_gep_offset: 3565; CHECK: .functype store_v2f64_with_folded_gep_offset (v128, i32) -> () 3566; CHECK-NEXT: # %bb.0: 3567; CHECK-NEXT: local.get 1 3568; CHECK-NEXT: local.get 0 3569; CHECK-NEXT: v128.store 16 3570; CHECK-NEXT: # fallthrough-return 3571 %s = getelementptr inbounds <2 x double>, ptr %p, i32 1 3572 store <2 x double> %v , ptr %s 3573 ret void 3574} 3575 3576define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, ptr %p) { 3577; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset: 3578; CHECK: .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> () 3579; CHECK-NEXT: # %bb.0: 3580; CHECK-NEXT: local.get 1 3581; CHECK-NEXT: i32.const -16 3582; CHECK-NEXT: i32.add 3583; CHECK-NEXT: local.get 0 3584; CHECK-NEXT: v128.store 0 3585; CHECK-NEXT: # fallthrough-return 3586 %s = getelementptr inbounds <2 x double>, ptr %p, i32 -1 3587 store <2 x double> %v , ptr %s 3588 ret void 3589} 3590 3591define void @store_v2f64_with_unfolded_offset(<2 x double> %v, ptr %p) { 3592; CHECK-LABEL: store_v2f64_with_unfolded_offset: 3593; CHECK: .functype store_v2f64_with_unfolded_offset (v128, i32) -> () 3594; CHECK-NEXT: # %bb.0: 3595; CHECK-NEXT: local.get 1 3596; CHECK-NEXT: i32.const 16 3597; CHECK-NEXT: i32.add 3598; CHECK-NEXT: local.get 0 3599; CHECK-NEXT: v128.store 0 3600; CHECK-NEXT: # fallthrough-return 3601 %q = ptrtoint ptr %p to i32 3602 %r = add nsw i32 %q, 16 3603 %s = inttoptr i32 %r to ptr 3604 store <2 x double> %v , ptr %s 3605 ret void 3606} 3607 3608define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, ptr %p) { 3609; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset: 3610; CHECK: .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> () 3611; CHECK-NEXT: # %bb.0: 3612; CHECK-NEXT: local.get 1 3613; CHECK-NEXT: i32.const 16 3614; CHECK-NEXT: i32.add 3615; CHECK-NEXT: local.get 0 3616; CHECK-NEXT: v128.store 0 3617; CHECK-NEXT: # fallthrough-return 3618 %s = getelementptr <2 x double>, ptr %p, i32 1 3619 store <2 x double> %v , ptr %s 3620 ret void 3621} 3622 3623define void @store_v2f64_to_numeric_address(<2 x double> %v) { 3624; CHECK-LABEL: store_v2f64_to_numeric_address: 3625; CHECK: .functype store_v2f64_to_numeric_address (v128) -> () 3626; CHECK-NEXT: # %bb.0: 3627; CHECK-NEXT: i32.const 0 3628; CHECK-NEXT: local.get 0 3629; CHECK-NEXT: v128.store 32 3630; CHECK-NEXT: # fallthrough-return 3631 %s = inttoptr i32 32 to ptr 3632 store <2 x double> %v , ptr %s 3633 ret void 3634} 3635 3636define void @store_v2f64_to_global_address(<2 x double> %v) { 3637; CHECK-LABEL: store_v2f64_to_global_address: 3638; CHECK: .functype store_v2f64_to_global_address (v128) -> () 3639; CHECK-NEXT: # %bb.0: 3640; CHECK-NEXT: i32.const 0 3641; CHECK-NEXT: local.get 0 3642; CHECK-NEXT: v128.store gv_v2f64 3643; CHECK-NEXT: # fallthrough-return 3644 store <2 x double> %v , ptr @gv_v2f64 3645 ret void 3646} 3647