1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm32-unknown-unknown | FileCheck %s 3; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm64-unknown-unknown | FileCheck %s --check-prefix MEM64 4 5; Test SIMD v128.load{8,16,32,64}_lane instructions. 6 7; TODO: Use the offset field by supporting more patterns. Right now only the 8; equivalents of LoadPatNoOffset/StorePatNoOffset are supported. 9 10;===---------------------------------------------------------------------------- 11; v128.load8_lane / v128.store8_lane 12;===---------------------------------------------------------------------------- 13 14define <16 x i8> @load_lane_i8_no_offset(ptr %p, <16 x i8> %v) { 15; CHECK-LABEL: load_lane_i8_no_offset: 16; CHECK: .functype load_lane_i8_no_offset (i32, v128) -> (v128) 17; CHECK-NEXT: # %bb.0: 18; CHECK-NEXT: local.get 0 19; CHECK-NEXT: local.get 1 20; CHECK-NEXT: v128.load8_lane 0, 0 21; CHECK-NEXT: # fallthrough-return 22; 23; MEM64-LABEL: load_lane_i8_no_offset: 24; MEM64: .functype load_lane_i8_no_offset (i64, v128) -> (v128) 25; MEM64-NEXT: # %bb.0: 26; MEM64-NEXT: local.get 0 27; MEM64-NEXT: local.get 1 28; MEM64-NEXT: v128.load8_lane 0, 0 29; MEM64-NEXT: # fallthrough-return 30 %x = load i8, ptr %p 31 %t = insertelement <16 x i8> %v, i8 %x, i32 0 32 ret <16 x i8> %t 33} 34 35define <16 x i8> @load_lane_i8_with_folded_offset(ptr %p, <16 x i8> %v) { 36; CHECK-LABEL: load_lane_i8_with_folded_offset: 37; CHECK: .functype load_lane_i8_with_folded_offset (i32, v128) -> (v128) 38; CHECK-NEXT: # %bb.0: 39; CHECK-NEXT: local.get 0 40; CHECK-NEXT: i32.const 24 41; CHECK-NEXT: i32.add 42; CHECK-NEXT: local.get 1 43; CHECK-NEXT: v128.load8_lane 0, 0 44; CHECK-NEXT: # fallthrough-return 45; 46; MEM64-LABEL: load_lane_i8_with_folded_offset: 47; MEM64: .functype load_lane_i8_with_folded_offset (i64, v128) -> (v128) 48; MEM64-NEXT: # %bb.0: 49; MEM64-NEXT: local.get 0 50; MEM64-NEXT: i32.wrap_i64 51; MEM64-NEXT: i32.const 24 52; MEM64-NEXT: i32.add 53; MEM64-NEXT: i64.extend_i32_u 54; MEM64-NEXT: local.get 1 55; MEM64-NEXT: v128.load8_lane 0, 0 56; MEM64-NEXT: # fallthrough-return 57 %q = ptrtoint ptr %p to i32 58 %r = add nuw i32 %q, 24 59 %s = inttoptr i32 %r to ptr 60 %x = load i8, ptr %s 61 %t = insertelement <16 x i8> %v, i8 %x, i32 0 62 ret <16 x i8> %t 63} 64 65define <16 x i8> @load_lane_i8_with_folded_gep_offset(ptr %p, <16 x i8> %v) { 66; CHECK-LABEL: load_lane_i8_with_folded_gep_offset: 67; CHECK: .functype load_lane_i8_with_folded_gep_offset (i32, v128) -> (v128) 68; CHECK-NEXT: # %bb.0: 69; CHECK-NEXT: local.get 0 70; CHECK-NEXT: i32.const 6 71; CHECK-NEXT: i32.add 72; CHECK-NEXT: local.get 1 73; CHECK-NEXT: v128.load8_lane 0, 0 74; CHECK-NEXT: # fallthrough-return 75; 76; MEM64-LABEL: load_lane_i8_with_folded_gep_offset: 77; MEM64: .functype load_lane_i8_with_folded_gep_offset (i64, v128) -> (v128) 78; MEM64-NEXT: # %bb.0: 79; MEM64-NEXT: local.get 0 80; MEM64-NEXT: i64.const 6 81; MEM64-NEXT: i64.add 82; MEM64-NEXT: local.get 1 83; MEM64-NEXT: v128.load8_lane 0, 0 84; MEM64-NEXT: # fallthrough-return 85 %s = getelementptr inbounds i8, ptr %p, i32 6 86 %x = load i8, ptr %s 87 %t = insertelement <16 x i8> %v, i8 %x, i32 0 88 ret <16 x i8> %t 89} 90 91define <16 x i8> @load_lane_i8_with_unfolded_gep_negative_offset(ptr %p, <16 x i8> %v) { 92; CHECK-LABEL: load_lane_i8_with_unfolded_gep_negative_offset: 93; CHECK: .functype load_lane_i8_with_unfolded_gep_negative_offset (i32, v128) -> (v128) 94; CHECK-NEXT: # %bb.0: 95; CHECK-NEXT: local.get 0 96; CHECK-NEXT: i32.const -6 97; CHECK-NEXT: i32.add 98; CHECK-NEXT: local.get 1 99; CHECK-NEXT: v128.load8_lane 0, 0 100; CHECK-NEXT: # fallthrough-return 101; 102; MEM64-LABEL: load_lane_i8_with_unfolded_gep_negative_offset: 103; MEM64: .functype load_lane_i8_with_unfolded_gep_negative_offset (i64, v128) -> (v128) 104; MEM64-NEXT: # %bb.0: 105; MEM64-NEXT: local.get 0 106; MEM64-NEXT: i64.const -6 107; MEM64-NEXT: i64.add 108; MEM64-NEXT: local.get 1 109; MEM64-NEXT: v128.load8_lane 0, 0 110; MEM64-NEXT: # fallthrough-return 111 %s = getelementptr inbounds i8, ptr %p, i32 -6 112 %x = load i8, ptr %s 113 %t = insertelement <16 x i8> %v, i8 %x, i32 0 114 ret <16 x i8> %t 115} 116 117define <16 x i8> @load_lane_i8_with_unfolded_offset(ptr %p, <16 x i8> %v) { 118; CHECK-LABEL: load_lane_i8_with_unfolded_offset: 119; CHECK: .functype load_lane_i8_with_unfolded_offset (i32, v128) -> (v128) 120; CHECK-NEXT: # %bb.0: 121; CHECK-NEXT: local.get 0 122; CHECK-NEXT: i32.const 24 123; CHECK-NEXT: i32.add 124; CHECK-NEXT: local.get 1 125; CHECK-NEXT: v128.load8_lane 0, 0 126; CHECK-NEXT: # fallthrough-return 127; 128; MEM64-LABEL: load_lane_i8_with_unfolded_offset: 129; MEM64: .functype load_lane_i8_with_unfolded_offset (i64, v128) -> (v128) 130; MEM64-NEXT: # %bb.0: 131; MEM64-NEXT: local.get 0 132; MEM64-NEXT: i32.wrap_i64 133; MEM64-NEXT: i32.const 24 134; MEM64-NEXT: i32.add 135; MEM64-NEXT: i64.extend_i32_u 136; MEM64-NEXT: local.get 1 137; MEM64-NEXT: v128.load8_lane 0, 0 138; MEM64-NEXT: # fallthrough-return 139 %q = ptrtoint ptr %p to i32 140 %r = add nsw i32 %q, 24 141 %s = inttoptr i32 %r to ptr 142 %x = load i8, ptr %s 143 %t = insertelement <16 x i8> %v, i8 %x, i32 0 144 ret <16 x i8> %t 145} 146 147define <16 x i8> @load_lane_i8_with_unfolded_gep_offset(ptr %p, <16 x i8> %v) { 148; CHECK-LABEL: load_lane_i8_with_unfolded_gep_offset: 149; CHECK: .functype load_lane_i8_with_unfolded_gep_offset (i32, v128) -> (v128) 150; CHECK-NEXT: # %bb.0: 151; CHECK-NEXT: local.get 0 152; CHECK-NEXT: i32.const 6 153; CHECK-NEXT: i32.add 154; CHECK-NEXT: local.get 1 155; CHECK-NEXT: v128.load8_lane 0, 0 156; CHECK-NEXT: # fallthrough-return 157; 158; MEM64-LABEL: load_lane_i8_with_unfolded_gep_offset: 159; MEM64: .functype load_lane_i8_with_unfolded_gep_offset (i64, v128) -> (v128) 160; MEM64-NEXT: # %bb.0: 161; MEM64-NEXT: local.get 0 162; MEM64-NEXT: i64.const 6 163; MEM64-NEXT: i64.add 164; MEM64-NEXT: local.get 1 165; MEM64-NEXT: v128.load8_lane 0, 0 166; MEM64-NEXT: # fallthrough-return 167 %s = getelementptr i8, ptr %p, i32 6 168 %x = load i8, ptr %s 169 %t = insertelement <16 x i8> %v, i8 %x, i32 0 170 ret <16 x i8> %t 171} 172 173define <16 x i8> @load_lane_i8_from_numeric_address(<16 x i8> %v) { 174; CHECK-LABEL: load_lane_i8_from_numeric_address: 175; CHECK: .functype load_lane_i8_from_numeric_address (v128) -> (v128) 176; CHECK-NEXT: # %bb.0: 177; CHECK-NEXT: i32.const 42 178; CHECK-NEXT: local.get 0 179; CHECK-NEXT: v128.load8_lane 0, 0 180; CHECK-NEXT: # fallthrough-return 181; 182; MEM64-LABEL: load_lane_i8_from_numeric_address: 183; MEM64: .functype load_lane_i8_from_numeric_address (v128) -> (v128) 184; MEM64-NEXT: # %bb.0: 185; MEM64-NEXT: i64.const 42 186; MEM64-NEXT: local.get 0 187; MEM64-NEXT: v128.load8_lane 0, 0 188; MEM64-NEXT: # fallthrough-return 189 %s = inttoptr i32 42 to ptr 190 %x = load i8, ptr %s 191 %t = insertelement <16 x i8> %v, i8 %x, i32 0 192 ret <16 x i8> %t 193} 194 195@gv_i8 = global i8 0 196define <16 x i8> @load_lane_i8_from_global_address(<16 x i8> %v) { 197; CHECK-LABEL: load_lane_i8_from_global_address: 198; CHECK: .functype load_lane_i8_from_global_address (v128) -> (v128) 199; CHECK-NEXT: # %bb.0: 200; CHECK-NEXT: i32.const gv_i8 201; CHECK-NEXT: local.get 0 202; CHECK-NEXT: v128.load8_lane 0, 0 203; CHECK-NEXT: # fallthrough-return 204; 205; MEM64-LABEL: load_lane_i8_from_global_address: 206; MEM64: .functype load_lane_i8_from_global_address (v128) -> (v128) 207; MEM64-NEXT: # %bb.0: 208; MEM64-NEXT: i64.const gv_i8 209; MEM64-NEXT: local.get 0 210; MEM64-NEXT: v128.load8_lane 0, 0 211; MEM64-NEXT: # fallthrough-return 212 %x = load i8, ptr @gv_i8 213 %t = insertelement <16 x i8> %v, i8 %x, i32 0 214 ret <16 x i8> %t 215} 216 217define void @store_lane_i8_no_offset(<16 x i8> %v, ptr %p) { 218; CHECK-LABEL: store_lane_i8_no_offset: 219; CHECK: .functype store_lane_i8_no_offset (v128, i32) -> () 220; CHECK-NEXT: # %bb.0: 221; CHECK-NEXT: local.get 1 222; CHECK-NEXT: local.get 0 223; CHECK-NEXT: v128.store8_lane 0, 0 224; CHECK-NEXT: # fallthrough-return 225; 226; MEM64-LABEL: store_lane_i8_no_offset: 227; MEM64: .functype store_lane_i8_no_offset (v128, i64) -> () 228; MEM64-NEXT: # %bb.0: 229; MEM64-NEXT: local.get 1 230; MEM64-NEXT: local.get 0 231; MEM64-NEXT: v128.store8_lane 0, 0 232; MEM64-NEXT: # fallthrough-return 233 %x = extractelement <16 x i8> %v, i32 0 234 store i8 %x, ptr %p 235 ret void 236} 237 238define void @store_lane_i8_with_folded_offset(<16 x i8> %v, ptr %p) { 239; CHECK-LABEL: store_lane_i8_with_folded_offset: 240; CHECK: .functype store_lane_i8_with_folded_offset (v128, i32) -> () 241; CHECK-NEXT: # %bb.0: 242; CHECK-NEXT: local.get 1 243; CHECK-NEXT: local.get 0 244; CHECK-NEXT: v128.store8_lane 24, 0 245; CHECK-NEXT: # fallthrough-return 246; 247; MEM64-LABEL: store_lane_i8_with_folded_offset: 248; MEM64: .functype store_lane_i8_with_folded_offset (v128, i64) -> () 249; MEM64-NEXT: # %bb.0: 250; MEM64-NEXT: local.get 1 251; MEM64-NEXT: i32.wrap_i64 252; MEM64-NEXT: i32.const 24 253; MEM64-NEXT: i32.add 254; MEM64-NEXT: i64.extend_i32_u 255; MEM64-NEXT: local.get 0 256; MEM64-NEXT: v128.store8_lane 0, 0 257; MEM64-NEXT: # fallthrough-return 258 %q = ptrtoint ptr %p to i32 259 %r = add nuw i32 %q, 24 260 %s = inttoptr i32 %r to ptr 261 %x = extractelement <16 x i8> %v, i32 0 262 store i8 %x, ptr %s 263 ret void 264} 265 266define void @store_lane_i8_with_folded_gep_offset(<16 x i8> %v, ptr %p) { 267; CHECK-LABEL: store_lane_i8_with_folded_gep_offset: 268; CHECK: .functype store_lane_i8_with_folded_gep_offset (v128, i32) -> () 269; CHECK-NEXT: # %bb.0: 270; CHECK-NEXT: local.get 1 271; CHECK-NEXT: local.get 0 272; CHECK-NEXT: v128.store8_lane 6, 0 273; CHECK-NEXT: # fallthrough-return 274; 275; MEM64-LABEL: store_lane_i8_with_folded_gep_offset: 276; MEM64: .functype store_lane_i8_with_folded_gep_offset (v128, i64) -> () 277; MEM64-NEXT: # %bb.0: 278; MEM64-NEXT: local.get 1 279; MEM64-NEXT: local.get 0 280; MEM64-NEXT: v128.store8_lane 6, 0 281; MEM64-NEXT: # fallthrough-return 282 %s = getelementptr inbounds i8, ptr %p, i32 6 283 %x = extractelement <16 x i8> %v, i32 0 284 store i8 %x, ptr %s 285 ret void 286} 287 288define void @store_lane_i8_with_unfolded_gep_negative_offset(<16 x i8> %v, ptr %p) { 289; CHECK-LABEL: store_lane_i8_with_unfolded_gep_negative_offset: 290; CHECK: .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i32) -> () 291; CHECK-NEXT: # %bb.0: 292; CHECK-NEXT: local.get 1 293; CHECK-NEXT: i32.const -6 294; CHECK-NEXT: i32.add 295; CHECK-NEXT: local.get 0 296; CHECK-NEXT: v128.store8_lane 0, 0 297; CHECK-NEXT: # fallthrough-return 298; 299; MEM64-LABEL: store_lane_i8_with_unfolded_gep_negative_offset: 300; MEM64: .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i64) -> () 301; MEM64-NEXT: # %bb.0: 302; MEM64-NEXT: local.get 1 303; MEM64-NEXT: i64.const -6 304; MEM64-NEXT: i64.add 305; MEM64-NEXT: local.get 0 306; MEM64-NEXT: v128.store8_lane 0, 0 307; MEM64-NEXT: # fallthrough-return 308 %s = getelementptr inbounds i8, ptr %p, i32 -6 309 %x = extractelement <16 x i8> %v, i32 0 310 store i8 %x, ptr %s 311 ret void 312} 313 314define void @store_lane_i8_with_unfolded_offset(<16 x i8> %v, ptr %p) { 315; CHECK-LABEL: store_lane_i8_with_unfolded_offset: 316; CHECK: .functype store_lane_i8_with_unfolded_offset (v128, i32) -> () 317; CHECK-NEXT: # %bb.0: 318; CHECK-NEXT: local.get 1 319; CHECK-NEXT: i32.const 24 320; CHECK-NEXT: i32.add 321; CHECK-NEXT: local.get 0 322; CHECK-NEXT: v128.store8_lane 0, 0 323; CHECK-NEXT: # fallthrough-return 324; 325; MEM64-LABEL: store_lane_i8_with_unfolded_offset: 326; MEM64: .functype store_lane_i8_with_unfolded_offset (v128, i64) -> () 327; MEM64-NEXT: # %bb.0: 328; MEM64-NEXT: local.get 1 329; MEM64-NEXT: i32.wrap_i64 330; MEM64-NEXT: i32.const 24 331; MEM64-NEXT: i32.add 332; MEM64-NEXT: i64.extend_i32_u 333; MEM64-NEXT: local.get 0 334; MEM64-NEXT: v128.store8_lane 0, 0 335; MEM64-NEXT: # fallthrough-return 336 %q = ptrtoint ptr %p to i32 337 %r = add nsw i32 %q, 24 338 %s = inttoptr i32 %r to ptr 339 %x = extractelement <16 x i8> %v, i32 0 340 store i8 %x, ptr %s 341 ret void 342} 343 344define void @store_lane_i8_with_unfolded_gep_offset(<16 x i8> %v, ptr %p) { 345; CHECK-LABEL: store_lane_i8_with_unfolded_gep_offset: 346; CHECK: .functype store_lane_i8_with_unfolded_gep_offset (v128, i32) -> () 347; CHECK-NEXT: # %bb.0: 348; CHECK-NEXT: local.get 1 349; CHECK-NEXT: i32.const 6 350; CHECK-NEXT: i32.add 351; CHECK-NEXT: local.get 0 352; CHECK-NEXT: v128.store8_lane 0, 0 353; CHECK-NEXT: # fallthrough-return 354; 355; MEM64-LABEL: store_lane_i8_with_unfolded_gep_offset: 356; MEM64: .functype store_lane_i8_with_unfolded_gep_offset (v128, i64) -> () 357; MEM64-NEXT: # %bb.0: 358; MEM64-NEXT: local.get 1 359; MEM64-NEXT: i64.const 6 360; MEM64-NEXT: i64.add 361; MEM64-NEXT: local.get 0 362; MEM64-NEXT: v128.store8_lane 0, 0 363; MEM64-NEXT: # fallthrough-return 364 %s = getelementptr i8, ptr %p, i32 6 365 %x = extractelement <16 x i8> %v, i32 0 366 store i8 %x, ptr %s 367 ret void 368} 369 370define void @store_lane_i8_to_numeric_address(<16 x i8> %v) { 371; CHECK-LABEL: store_lane_i8_to_numeric_address: 372; CHECK: .functype store_lane_i8_to_numeric_address (v128) -> () 373; CHECK-NEXT: # %bb.0: 374; CHECK-NEXT: i32.const 0 375; CHECK-NEXT: local.get 0 376; CHECK-NEXT: v128.store8_lane 42, 0 377; CHECK-NEXT: # fallthrough-return 378; 379; MEM64-LABEL: store_lane_i8_to_numeric_address: 380; MEM64: .functype store_lane_i8_to_numeric_address (v128) -> () 381; MEM64-NEXT: # %bb.0: 382; MEM64-NEXT: i64.const 0 383; MEM64-NEXT: local.get 0 384; MEM64-NEXT: v128.store8_lane 42, 0 385; MEM64-NEXT: # fallthrough-return 386 %s = inttoptr i32 42 to ptr 387 %x = extractelement <16 x i8> %v, i32 0 388 store i8 %x, ptr %s 389 ret void 390} 391 392define void @store_lane_i8_from_global_address(<16 x i8> %v) { 393; CHECK-LABEL: store_lane_i8_from_global_address: 394; CHECK: .functype store_lane_i8_from_global_address (v128) -> () 395; CHECK-NEXT: # %bb.0: 396; CHECK-NEXT: i32.const 0 397; CHECK-NEXT: local.get 0 398; CHECK-NEXT: v128.store8_lane gv_i8, 0 399; CHECK-NEXT: # fallthrough-return 400; 401; MEM64-LABEL: store_lane_i8_from_global_address: 402; MEM64: .functype store_lane_i8_from_global_address (v128) -> () 403; MEM64-NEXT: # %bb.0: 404; MEM64-NEXT: i64.const 0 405; MEM64-NEXT: local.get 0 406; MEM64-NEXT: v128.store8_lane gv_i8, 0 407; MEM64-NEXT: # fallthrough-return 408 %x = extractelement <16 x i8> %v, i32 0 409 store i8 %x, ptr @gv_i8 410 ret void 411} 412 413;===---------------------------------------------------------------------------- 414; v128.load16_lane / v128.store16_lane 415;===---------------------------------------------------------------------------- 416 417define <8 x i16> @load_lane_i16_no_offset(ptr %p, <8 x i16> %v) { 418; CHECK-LABEL: load_lane_i16_no_offset: 419; CHECK: .functype load_lane_i16_no_offset (i32, v128) -> (v128) 420; CHECK-NEXT: # %bb.0: 421; CHECK-NEXT: local.get 0 422; CHECK-NEXT: local.get 1 423; CHECK-NEXT: v128.load16_lane 0, 0 424; CHECK-NEXT: # fallthrough-return 425; 426; MEM64-LABEL: load_lane_i16_no_offset: 427; MEM64: .functype load_lane_i16_no_offset (i64, v128) -> (v128) 428; MEM64-NEXT: # %bb.0: 429; MEM64-NEXT: local.get 0 430; MEM64-NEXT: local.get 1 431; MEM64-NEXT: v128.load16_lane 0, 0 432; MEM64-NEXT: # fallthrough-return 433 %x = load i16, ptr %p 434 %t = insertelement <8 x i16> %v, i16 %x, i32 0 435 ret <8 x i16> %t 436} 437 438define <8 x i16> @load_lane_i16_with_folded_offset(ptr %p, <8 x i16> %v) { 439; CHECK-LABEL: load_lane_i16_with_folded_offset: 440; CHECK: .functype load_lane_i16_with_folded_offset (i32, v128) -> (v128) 441; CHECK-NEXT: # %bb.0: 442; CHECK-NEXT: local.get 0 443; CHECK-NEXT: i32.const 24 444; CHECK-NEXT: i32.add 445; CHECK-NEXT: local.get 1 446; CHECK-NEXT: v128.load16_lane 0, 0 447; CHECK-NEXT: # fallthrough-return 448; 449; MEM64-LABEL: load_lane_i16_with_folded_offset: 450; MEM64: .functype load_lane_i16_with_folded_offset (i64, v128) -> (v128) 451; MEM64-NEXT: # %bb.0: 452; MEM64-NEXT: local.get 0 453; MEM64-NEXT: i32.wrap_i64 454; MEM64-NEXT: i32.const 24 455; MEM64-NEXT: i32.add 456; MEM64-NEXT: i64.extend_i32_u 457; MEM64-NEXT: local.get 1 458; MEM64-NEXT: v128.load16_lane 0, 0 459; MEM64-NEXT: # fallthrough-return 460 %q = ptrtoint ptr %p to i32 461 %r = add nuw i32 %q, 24 462 %s = inttoptr i32 %r to ptr 463 %x = load i16, ptr %s 464 %t = insertelement <8 x i16> %v, i16 %x, i32 0 465 ret <8 x i16> %t 466} 467 468define <8 x i16> @load_lane_i16_with_folded_gep_offset(ptr %p, <8 x i16> %v) { 469; CHECK-LABEL: load_lane_i16_with_folded_gep_offset: 470; CHECK: .functype load_lane_i16_with_folded_gep_offset (i32, v128) -> (v128) 471; CHECK-NEXT: # %bb.0: 472; CHECK-NEXT: local.get 0 473; CHECK-NEXT: i32.const 12 474; CHECK-NEXT: i32.add 475; CHECK-NEXT: local.get 1 476; CHECK-NEXT: v128.load16_lane 0, 0 477; CHECK-NEXT: # fallthrough-return 478; 479; MEM64-LABEL: load_lane_i16_with_folded_gep_offset: 480; MEM64: .functype load_lane_i16_with_folded_gep_offset (i64, v128) -> (v128) 481; MEM64-NEXT: # %bb.0: 482; MEM64-NEXT: local.get 0 483; MEM64-NEXT: i64.const 12 484; MEM64-NEXT: i64.add 485; MEM64-NEXT: local.get 1 486; MEM64-NEXT: v128.load16_lane 0, 0 487; MEM64-NEXT: # fallthrough-return 488 %s = getelementptr inbounds i16, ptr %p, i32 6 489 %x = load i16, ptr %s 490 %t = insertelement <8 x i16> %v, i16 %x, i32 0 491 ret <8 x i16> %t 492} 493 494define <8 x i16> @load_lane_i16_with_unfolded_gep_negative_offset(ptr %p, <8 x i16> %v) { 495; CHECK-LABEL: load_lane_i16_with_unfolded_gep_negative_offset: 496; CHECK: .functype load_lane_i16_with_unfolded_gep_negative_offset (i32, v128) -> (v128) 497; CHECK-NEXT: # %bb.0: 498; CHECK-NEXT: local.get 0 499; CHECK-NEXT: i32.const -12 500; CHECK-NEXT: i32.add 501; CHECK-NEXT: local.get 1 502; CHECK-NEXT: v128.load16_lane 0, 0 503; CHECK-NEXT: # fallthrough-return 504; 505; MEM64-LABEL: load_lane_i16_with_unfolded_gep_negative_offset: 506; MEM64: .functype load_lane_i16_with_unfolded_gep_negative_offset (i64, v128) -> (v128) 507; MEM64-NEXT: # %bb.0: 508; MEM64-NEXT: local.get 0 509; MEM64-NEXT: i64.const -12 510; MEM64-NEXT: i64.add 511; MEM64-NEXT: local.get 1 512; MEM64-NEXT: v128.load16_lane 0, 0 513; MEM64-NEXT: # fallthrough-return 514 %s = getelementptr inbounds i16, ptr %p, i32 -6 515 %x = load i16, ptr %s 516 %t = insertelement <8 x i16> %v, i16 %x, i32 0 517 ret <8 x i16> %t 518} 519 520define <8 x i16> @load_lane_i16_with_unfolded_offset(ptr %p, <8 x i16> %v) { 521; CHECK-LABEL: load_lane_i16_with_unfolded_offset: 522; CHECK: .functype load_lane_i16_with_unfolded_offset (i32, v128) -> (v128) 523; CHECK-NEXT: # %bb.0: 524; CHECK-NEXT: local.get 0 525; CHECK-NEXT: i32.const 24 526; CHECK-NEXT: i32.add 527; CHECK-NEXT: local.get 1 528; CHECK-NEXT: v128.load16_lane 0, 0 529; CHECK-NEXT: # fallthrough-return 530; 531; MEM64-LABEL: load_lane_i16_with_unfolded_offset: 532; MEM64: .functype load_lane_i16_with_unfolded_offset (i64, v128) -> (v128) 533; MEM64-NEXT: # %bb.0: 534; MEM64-NEXT: local.get 0 535; MEM64-NEXT: i32.wrap_i64 536; MEM64-NEXT: i32.const 24 537; MEM64-NEXT: i32.add 538; MEM64-NEXT: i64.extend_i32_u 539; MEM64-NEXT: local.get 1 540; MEM64-NEXT: v128.load16_lane 0, 0 541; MEM64-NEXT: # fallthrough-return 542 %q = ptrtoint ptr %p to i32 543 %r = add nsw i32 %q, 24 544 %s = inttoptr i32 %r to ptr 545 %x = load i16, ptr %s 546 %t = insertelement <8 x i16> %v, i16 %x, i32 0 547 ret <8 x i16> %t 548} 549 550define <8 x i16> @load_lane_i16_with_unfolded_gep_offset(ptr %p, <8 x i16> %v) { 551; CHECK-LABEL: load_lane_i16_with_unfolded_gep_offset: 552; CHECK: .functype load_lane_i16_with_unfolded_gep_offset (i32, v128) -> (v128) 553; CHECK-NEXT: # %bb.0: 554; CHECK-NEXT: local.get 0 555; CHECK-NEXT: i32.const 12 556; CHECK-NEXT: i32.add 557; CHECK-NEXT: local.get 1 558; CHECK-NEXT: v128.load16_lane 0, 0 559; CHECK-NEXT: # fallthrough-return 560; 561; MEM64-LABEL: load_lane_i16_with_unfolded_gep_offset: 562; MEM64: .functype load_lane_i16_with_unfolded_gep_offset (i64, v128) -> (v128) 563; MEM64-NEXT: # %bb.0: 564; MEM64-NEXT: local.get 0 565; MEM64-NEXT: i64.const 12 566; MEM64-NEXT: i64.add 567; MEM64-NEXT: local.get 1 568; MEM64-NEXT: v128.load16_lane 0, 0 569; MEM64-NEXT: # fallthrough-return 570 %s = getelementptr i16, ptr %p, i32 6 571 %x = load i16, ptr %s 572 %t = insertelement <8 x i16> %v, i16 %x, i32 0 573 ret <8 x i16> %t 574} 575 576define <8 x i16> @load_lane_i16_from_numeric_address(<8 x i16> %v) { 577; CHECK-LABEL: load_lane_i16_from_numeric_address: 578; CHECK: .functype load_lane_i16_from_numeric_address (v128) -> (v128) 579; CHECK-NEXT: # %bb.0: 580; CHECK-NEXT: i32.const 42 581; CHECK-NEXT: local.get 0 582; CHECK-NEXT: v128.load16_lane 0, 0 583; CHECK-NEXT: # fallthrough-return 584; 585; MEM64-LABEL: load_lane_i16_from_numeric_address: 586; MEM64: .functype load_lane_i16_from_numeric_address (v128) -> (v128) 587; MEM64-NEXT: # %bb.0: 588; MEM64-NEXT: i64.const 42 589; MEM64-NEXT: local.get 0 590; MEM64-NEXT: v128.load16_lane 0, 0 591; MEM64-NEXT: # fallthrough-return 592 %s = inttoptr i32 42 to ptr 593 %x = load i16, ptr %s 594 %t = insertelement <8 x i16> %v, i16 %x, i32 0 595 ret <8 x i16> %t 596} 597 598@gv_i16 = global i16 0 599define <8 x i16> @load_lane_i16_from_global_address(<8 x i16> %v) { 600; CHECK-LABEL: load_lane_i16_from_global_address: 601; CHECK: .functype load_lane_i16_from_global_address (v128) -> (v128) 602; CHECK-NEXT: # %bb.0: 603; CHECK-NEXT: i32.const gv_i16 604; CHECK-NEXT: local.get 0 605; CHECK-NEXT: v128.load16_lane 0, 0 606; CHECK-NEXT: # fallthrough-return 607; 608; MEM64-LABEL: load_lane_i16_from_global_address: 609; MEM64: .functype load_lane_i16_from_global_address (v128) -> (v128) 610; MEM64-NEXT: # %bb.0: 611; MEM64-NEXT: i64.const gv_i16 612; MEM64-NEXT: local.get 0 613; MEM64-NEXT: v128.load16_lane 0, 0 614; MEM64-NEXT: # fallthrough-return 615 %x = load i16, ptr @gv_i16 616 %t = insertelement <8 x i16> %v, i16 %x, i32 0 617 ret <8 x i16> %t 618} 619 620define void @store_lane_i16_no_offset(<8 x i16> %v, ptr %p) { 621; CHECK-LABEL: store_lane_i16_no_offset: 622; CHECK: .functype store_lane_i16_no_offset (v128, i32) -> () 623; CHECK-NEXT: # %bb.0: 624; CHECK-NEXT: local.get 1 625; CHECK-NEXT: local.get 0 626; CHECK-NEXT: v128.store16_lane 0, 0 627; CHECK-NEXT: # fallthrough-return 628; 629; MEM64-LABEL: store_lane_i16_no_offset: 630; MEM64: .functype store_lane_i16_no_offset (v128, i64) -> () 631; MEM64-NEXT: # %bb.0: 632; MEM64-NEXT: local.get 1 633; MEM64-NEXT: local.get 0 634; MEM64-NEXT: v128.store16_lane 0, 0 635; MEM64-NEXT: # fallthrough-return 636 %x = extractelement <8 x i16> %v, i32 0 637 store i16 %x, ptr %p 638 ret void 639} 640 641define void @store_lane_i16_with_folded_offset(<8 x i16> %v, ptr %p) { 642; CHECK-LABEL: store_lane_i16_with_folded_offset: 643; CHECK: .functype store_lane_i16_with_folded_offset (v128, i32) -> () 644; CHECK-NEXT: # %bb.0: 645; CHECK-NEXT: local.get 1 646; CHECK-NEXT: local.get 0 647; CHECK-NEXT: v128.store16_lane 24, 0 648; CHECK-NEXT: # fallthrough-return 649; 650; MEM64-LABEL: store_lane_i16_with_folded_offset: 651; MEM64: .functype store_lane_i16_with_folded_offset (v128, i64) -> () 652; MEM64-NEXT: # %bb.0: 653; MEM64-NEXT: local.get 1 654; MEM64-NEXT: i32.wrap_i64 655; MEM64-NEXT: i32.const 24 656; MEM64-NEXT: i32.add 657; MEM64-NEXT: i64.extend_i32_u 658; MEM64-NEXT: local.get 0 659; MEM64-NEXT: v128.store16_lane 0, 0 660; MEM64-NEXT: # fallthrough-return 661 %q = ptrtoint ptr %p to i32 662 %r = add nuw i32 %q, 24 663 %s = inttoptr i32 %r to ptr 664 %x = extractelement <8 x i16> %v, i32 0 665 store i16 %x, ptr %s 666 ret void 667} 668 669define void @store_lane_i16_with_folded_gep_offset(<8 x i16> %v, ptr %p) { 670; CHECK-LABEL: store_lane_i16_with_folded_gep_offset: 671; CHECK: .functype store_lane_i16_with_folded_gep_offset (v128, i32) -> () 672; CHECK-NEXT: # %bb.0: 673; CHECK-NEXT: local.get 1 674; CHECK-NEXT: local.get 0 675; CHECK-NEXT: v128.store16_lane 12, 0 676; CHECK-NEXT: # fallthrough-return 677; 678; MEM64-LABEL: store_lane_i16_with_folded_gep_offset: 679; MEM64: .functype store_lane_i16_with_folded_gep_offset (v128, i64) -> () 680; MEM64-NEXT: # %bb.0: 681; MEM64-NEXT: local.get 1 682; MEM64-NEXT: local.get 0 683; MEM64-NEXT: v128.store16_lane 12, 0 684; MEM64-NEXT: # fallthrough-return 685 %s = getelementptr inbounds i16, ptr %p, i32 6 686 %x = extractelement <8 x i16> %v, i32 0 687 store i16 %x, ptr %s 688 ret void 689} 690 691define void @store_lane_i16_with_unfolded_gep_negative_offset(<8 x i16> %v, ptr %p) { 692; CHECK-LABEL: store_lane_i16_with_unfolded_gep_negative_offset: 693; CHECK: .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i32) -> () 694; CHECK-NEXT: # %bb.0: 695; CHECK-NEXT: local.get 1 696; CHECK-NEXT: i32.const -12 697; CHECK-NEXT: i32.add 698; CHECK-NEXT: local.get 0 699; CHECK-NEXT: v128.store16_lane 0, 0 700; CHECK-NEXT: # fallthrough-return 701; 702; MEM64-LABEL: store_lane_i16_with_unfolded_gep_negative_offset: 703; MEM64: .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i64) -> () 704; MEM64-NEXT: # %bb.0: 705; MEM64-NEXT: local.get 1 706; MEM64-NEXT: i64.const -12 707; MEM64-NEXT: i64.add 708; MEM64-NEXT: local.get 0 709; MEM64-NEXT: v128.store16_lane 0, 0 710; MEM64-NEXT: # fallthrough-return 711 %s = getelementptr inbounds i16, ptr %p, i32 -6 712 %x = extractelement <8 x i16> %v, i32 0 713 store i16 %x, ptr %s 714 ret void 715} 716 717define void @store_lane_i16_with_unfolded_offset(<8 x i16> %v, ptr %p) { 718; CHECK-LABEL: store_lane_i16_with_unfolded_offset: 719; CHECK: .functype store_lane_i16_with_unfolded_offset (v128, i32) -> () 720; CHECK-NEXT: # %bb.0: 721; CHECK-NEXT: local.get 1 722; CHECK-NEXT: i32.const 24 723; CHECK-NEXT: i32.add 724; CHECK-NEXT: local.get 0 725; CHECK-NEXT: v128.store16_lane 0, 0 726; CHECK-NEXT: # fallthrough-return 727; 728; MEM64-LABEL: store_lane_i16_with_unfolded_offset: 729; MEM64: .functype store_lane_i16_with_unfolded_offset (v128, i64) -> () 730; MEM64-NEXT: # %bb.0: 731; MEM64-NEXT: local.get 1 732; MEM64-NEXT: i32.wrap_i64 733; MEM64-NEXT: i32.const 24 734; MEM64-NEXT: i32.add 735; MEM64-NEXT: i64.extend_i32_u 736; MEM64-NEXT: local.get 0 737; MEM64-NEXT: v128.store16_lane 0, 0 738; MEM64-NEXT: # fallthrough-return 739 %q = ptrtoint ptr %p to i32 740 %r = add nsw i32 %q, 24 741 %s = inttoptr i32 %r to ptr 742 %x = extractelement <8 x i16> %v, i32 0 743 store i16 %x, ptr %s 744 ret void 745} 746 747define void @store_lane_i16_with_unfolded_gep_offset(<8 x i16> %v, ptr %p) { 748; CHECK-LABEL: store_lane_i16_with_unfolded_gep_offset: 749; CHECK: .functype store_lane_i16_with_unfolded_gep_offset (v128, i32) -> () 750; CHECK-NEXT: # %bb.0: 751; CHECK-NEXT: local.get 1 752; CHECK-NEXT: i32.const 12 753; CHECK-NEXT: i32.add 754; CHECK-NEXT: local.get 0 755; CHECK-NEXT: v128.store16_lane 0, 0 756; CHECK-NEXT: # fallthrough-return 757; 758; MEM64-LABEL: store_lane_i16_with_unfolded_gep_offset: 759; MEM64: .functype store_lane_i16_with_unfolded_gep_offset (v128, i64) -> () 760; MEM64-NEXT: # %bb.0: 761; MEM64-NEXT: local.get 1 762; MEM64-NEXT: i64.const 12 763; MEM64-NEXT: i64.add 764; MEM64-NEXT: local.get 0 765; MEM64-NEXT: v128.store16_lane 0, 0 766; MEM64-NEXT: # fallthrough-return 767 %s = getelementptr i16, ptr %p, i32 6 768 %x = extractelement <8 x i16> %v, i32 0 769 store i16 %x, ptr %s 770 ret void 771} 772 773define void @store_lane_i16_to_numeric_address(<8 x i16> %v) { 774; CHECK-LABEL: store_lane_i16_to_numeric_address: 775; CHECK: .functype store_lane_i16_to_numeric_address (v128) -> () 776; CHECK-NEXT: # %bb.0: 777; CHECK-NEXT: i32.const 0 778; CHECK-NEXT: local.get 0 779; CHECK-NEXT: v128.store16_lane 42, 0 780; CHECK-NEXT: # fallthrough-return 781; 782; MEM64-LABEL: store_lane_i16_to_numeric_address: 783; MEM64: .functype store_lane_i16_to_numeric_address (v128) -> () 784; MEM64-NEXT: # %bb.0: 785; MEM64-NEXT: i64.const 0 786; MEM64-NEXT: local.get 0 787; MEM64-NEXT: v128.store16_lane 42, 0 788; MEM64-NEXT: # fallthrough-return 789 %s = inttoptr i32 42 to ptr 790 %x = extractelement <8 x i16> %v, i32 0 791 store i16 %x, ptr %s 792 ret void 793} 794 795define void @store_lane_i16_from_global_address(<8 x i16> %v) { 796; CHECK-LABEL: store_lane_i16_from_global_address: 797; CHECK: .functype store_lane_i16_from_global_address (v128) -> () 798; CHECK-NEXT: # %bb.0: 799; CHECK-NEXT: i32.const 0 800; CHECK-NEXT: local.get 0 801; CHECK-NEXT: v128.store16_lane gv_i16, 0 802; CHECK-NEXT: # fallthrough-return 803; 804; MEM64-LABEL: store_lane_i16_from_global_address: 805; MEM64: .functype store_lane_i16_from_global_address (v128) -> () 806; MEM64-NEXT: # %bb.0: 807; MEM64-NEXT: i64.const 0 808; MEM64-NEXT: local.get 0 809; MEM64-NEXT: v128.store16_lane gv_i16, 0 810; MEM64-NEXT: # fallthrough-return 811 %x = extractelement <8 x i16> %v, i32 0 812 store i16 %x, ptr @gv_i16 813 ret void 814} 815 816;===---------------------------------------------------------------------------- 817; v128.load32_lane / v128.store32_lane 818;===---------------------------------------------------------------------------- 819 820define <4 x i32> @load_lane_i32_no_offset(ptr %p, <4 x i32> %v) { 821; CHECK-LABEL: load_lane_i32_no_offset: 822; CHECK: .functype load_lane_i32_no_offset (i32, v128) -> (v128) 823; CHECK-NEXT: # %bb.0: 824; CHECK-NEXT: local.get 0 825; CHECK-NEXT: local.get 1 826; CHECK-NEXT: v128.load32_lane 0, 0 827; CHECK-NEXT: # fallthrough-return 828; 829; MEM64-LABEL: load_lane_i32_no_offset: 830; MEM64: .functype load_lane_i32_no_offset (i64, v128) -> (v128) 831; MEM64-NEXT: # %bb.0: 832; MEM64-NEXT: local.get 0 833; MEM64-NEXT: local.get 1 834; MEM64-NEXT: v128.load32_lane 0, 0 835; MEM64-NEXT: # fallthrough-return 836 %x = load i32, ptr %p 837 %t = insertelement <4 x i32> %v, i32 %x, i32 0 838 ret <4 x i32> %t 839} 840 841define <4 x i32> @load_lane_i32_with_folded_offset(ptr %p, <4 x i32> %v) { 842; CHECK-LABEL: load_lane_i32_with_folded_offset: 843; CHECK: .functype load_lane_i32_with_folded_offset (i32, v128) -> (v128) 844; CHECK-NEXT: # %bb.0: 845; CHECK-NEXT: local.get 0 846; CHECK-NEXT: i32.const 24 847; CHECK-NEXT: i32.add 848; CHECK-NEXT: local.get 1 849; CHECK-NEXT: v128.load32_lane 0, 0 850; CHECK-NEXT: # fallthrough-return 851; 852; MEM64-LABEL: load_lane_i32_with_folded_offset: 853; MEM64: .functype load_lane_i32_with_folded_offset (i64, v128) -> (v128) 854; MEM64-NEXT: # %bb.0: 855; MEM64-NEXT: local.get 0 856; MEM64-NEXT: i32.wrap_i64 857; MEM64-NEXT: i32.const 24 858; MEM64-NEXT: i32.add 859; MEM64-NEXT: i64.extend_i32_u 860; MEM64-NEXT: local.get 1 861; MEM64-NEXT: v128.load32_lane 0, 0 862; MEM64-NEXT: # fallthrough-return 863 %q = ptrtoint ptr %p to i32 864 %r = add nuw i32 %q, 24 865 %s = inttoptr i32 %r to ptr 866 %x = load i32, ptr %s 867 %t = insertelement <4 x i32> %v, i32 %x, i32 0 868 ret <4 x i32> %t 869} 870 871define <4 x i32> @load_lane_i32_with_folded_gep_offset(ptr %p, <4 x i32> %v) { 872; CHECK-LABEL: load_lane_i32_with_folded_gep_offset: 873; CHECK: .functype load_lane_i32_with_folded_gep_offset (i32, v128) -> (v128) 874; CHECK-NEXT: # %bb.0: 875; CHECK-NEXT: local.get 0 876; CHECK-NEXT: i32.const 24 877; CHECK-NEXT: i32.add 878; CHECK-NEXT: local.get 1 879; CHECK-NEXT: v128.load32_lane 0, 0 880; CHECK-NEXT: # fallthrough-return 881; 882; MEM64-LABEL: load_lane_i32_with_folded_gep_offset: 883; MEM64: .functype load_lane_i32_with_folded_gep_offset (i64, v128) -> (v128) 884; MEM64-NEXT: # %bb.0: 885; MEM64-NEXT: local.get 0 886; MEM64-NEXT: i64.const 24 887; MEM64-NEXT: i64.add 888; MEM64-NEXT: local.get 1 889; MEM64-NEXT: v128.load32_lane 0, 0 890; MEM64-NEXT: # fallthrough-return 891 %s = getelementptr inbounds i32, ptr %p, i32 6 892 %x = load i32, ptr %s 893 %t = insertelement <4 x i32> %v, i32 %x, i32 0 894 ret <4 x i32> %t 895} 896 897define <4 x i32> @load_lane_i32_with_unfolded_gep_negative_offset(ptr %p, <4 x i32> %v) { 898; CHECK-LABEL: load_lane_i32_with_unfolded_gep_negative_offset: 899; CHECK: .functype load_lane_i32_with_unfolded_gep_negative_offset (i32, v128) -> (v128) 900; CHECK-NEXT: # %bb.0: 901; CHECK-NEXT: local.get 0 902; CHECK-NEXT: i32.const -24 903; CHECK-NEXT: i32.add 904; CHECK-NEXT: local.get 1 905; CHECK-NEXT: v128.load32_lane 0, 0 906; CHECK-NEXT: # fallthrough-return 907; 908; MEM64-LABEL: load_lane_i32_with_unfolded_gep_negative_offset: 909; MEM64: .functype load_lane_i32_with_unfolded_gep_negative_offset (i64, v128) -> (v128) 910; MEM64-NEXT: # %bb.0: 911; MEM64-NEXT: local.get 0 912; MEM64-NEXT: i64.const -24 913; MEM64-NEXT: i64.add 914; MEM64-NEXT: local.get 1 915; MEM64-NEXT: v128.load32_lane 0, 0 916; MEM64-NEXT: # fallthrough-return 917 %s = getelementptr inbounds i32, ptr %p, i32 -6 918 %x = load i32, ptr %s 919 %t = insertelement <4 x i32> %v, i32 %x, i32 0 920 ret <4 x i32> %t 921} 922 923define <4 x i32> @load_lane_i32_with_unfolded_offset(ptr %p, <4 x i32> %v) { 924; CHECK-LABEL: load_lane_i32_with_unfolded_offset: 925; CHECK: .functype load_lane_i32_with_unfolded_offset (i32, v128) -> (v128) 926; CHECK-NEXT: # %bb.0: 927; CHECK-NEXT: local.get 0 928; CHECK-NEXT: i32.const 24 929; CHECK-NEXT: i32.add 930; CHECK-NEXT: local.get 1 931; CHECK-NEXT: v128.load32_lane 0, 0 932; CHECK-NEXT: # fallthrough-return 933; 934; MEM64-LABEL: load_lane_i32_with_unfolded_offset: 935; MEM64: .functype load_lane_i32_with_unfolded_offset (i64, v128) -> (v128) 936; MEM64-NEXT: # %bb.0: 937; MEM64-NEXT: local.get 0 938; MEM64-NEXT: i32.wrap_i64 939; MEM64-NEXT: i32.const 24 940; MEM64-NEXT: i32.add 941; MEM64-NEXT: i64.extend_i32_u 942; MEM64-NEXT: local.get 1 943; MEM64-NEXT: v128.load32_lane 0, 0 944; MEM64-NEXT: # fallthrough-return 945 %q = ptrtoint ptr %p to i32 946 %r = add nsw i32 %q, 24 947 %s = inttoptr i32 %r to ptr 948 %x = load i32, ptr %s 949 %t = insertelement <4 x i32> %v, i32 %x, i32 0 950 ret <4 x i32> %t 951} 952 953define <4 x i32> @load_lane_i32_with_unfolded_gep_offset(ptr %p, <4 x i32> %v) { 954; CHECK-LABEL: load_lane_i32_with_unfolded_gep_offset: 955; CHECK: .functype load_lane_i32_with_unfolded_gep_offset (i32, v128) -> (v128) 956; CHECK-NEXT: # %bb.0: 957; CHECK-NEXT: local.get 0 958; CHECK-NEXT: i32.const 24 959; CHECK-NEXT: i32.add 960; CHECK-NEXT: local.get 1 961; CHECK-NEXT: v128.load32_lane 0, 0 962; CHECK-NEXT: # fallthrough-return 963; 964; MEM64-LABEL: load_lane_i32_with_unfolded_gep_offset: 965; MEM64: .functype load_lane_i32_with_unfolded_gep_offset (i64, v128) -> (v128) 966; MEM64-NEXT: # %bb.0: 967; MEM64-NEXT: local.get 0 968; MEM64-NEXT: i64.const 24 969; MEM64-NEXT: i64.add 970; MEM64-NEXT: local.get 1 971; MEM64-NEXT: v128.load32_lane 0, 0 972; MEM64-NEXT: # fallthrough-return 973 %s = getelementptr i32, ptr %p, i32 6 974 %x = load i32, ptr %s 975 %t = insertelement <4 x i32> %v, i32 %x, i32 0 976 ret <4 x i32> %t 977} 978 979define <4 x i32> @load_lane_i32_from_numeric_address(<4 x i32> %v) { 980; CHECK-LABEL: load_lane_i32_from_numeric_address: 981; CHECK: .functype load_lane_i32_from_numeric_address (v128) -> (v128) 982; CHECK-NEXT: # %bb.0: 983; CHECK-NEXT: i32.const 42 984; CHECK-NEXT: local.get 0 985; CHECK-NEXT: v128.load32_lane 0, 0 986; CHECK-NEXT: # fallthrough-return 987; 988; MEM64-LABEL: load_lane_i32_from_numeric_address: 989; MEM64: .functype load_lane_i32_from_numeric_address (v128) -> (v128) 990; MEM64-NEXT: # %bb.0: 991; MEM64-NEXT: i64.const 42 992; MEM64-NEXT: local.get 0 993; MEM64-NEXT: v128.load32_lane 0, 0 994; MEM64-NEXT: # fallthrough-return 995 %s = inttoptr i32 42 to ptr 996 %x = load i32, ptr %s 997 %t = insertelement <4 x i32> %v, i32 %x, i32 0 998 ret <4 x i32> %t 999} 1000 1001@gv_i32 = global i32 0 1002define <4 x i32> @load_lane_i32_from_global_address(<4 x i32> %v) { 1003; CHECK-LABEL: load_lane_i32_from_global_address: 1004; CHECK: .functype load_lane_i32_from_global_address (v128) -> (v128) 1005; CHECK-NEXT: # %bb.0: 1006; CHECK-NEXT: i32.const gv_i32 1007; CHECK-NEXT: local.get 0 1008; CHECK-NEXT: v128.load32_lane 0, 0 1009; CHECK-NEXT: # fallthrough-return 1010; 1011; MEM64-LABEL: load_lane_i32_from_global_address: 1012; MEM64: .functype load_lane_i32_from_global_address (v128) -> (v128) 1013; MEM64-NEXT: # %bb.0: 1014; MEM64-NEXT: i64.const gv_i32 1015; MEM64-NEXT: local.get 0 1016; MEM64-NEXT: v128.load32_lane 0, 0 1017; MEM64-NEXT: # fallthrough-return 1018 %x = load i32, ptr @gv_i32 1019 %t = insertelement <4 x i32> %v, i32 %x, i32 0 1020 ret <4 x i32> %t 1021} 1022 1023define void @store_lane_i32_no_offset(<4 x i32> %v, ptr %p) { 1024; CHECK-LABEL: store_lane_i32_no_offset: 1025; CHECK: .functype store_lane_i32_no_offset (v128, i32) -> () 1026; CHECK-NEXT: # %bb.0: 1027; CHECK-NEXT: local.get 1 1028; CHECK-NEXT: local.get 0 1029; CHECK-NEXT: v128.store32_lane 0, 0 1030; CHECK-NEXT: # fallthrough-return 1031; 1032; MEM64-LABEL: store_lane_i32_no_offset: 1033; MEM64: .functype store_lane_i32_no_offset (v128, i64) -> () 1034; MEM64-NEXT: # %bb.0: 1035; MEM64-NEXT: local.get 1 1036; MEM64-NEXT: local.get 0 1037; MEM64-NEXT: v128.store32_lane 0, 0 1038; MEM64-NEXT: # fallthrough-return 1039 %x = extractelement <4 x i32> %v, i32 0 1040 store i32 %x, ptr %p 1041 ret void 1042} 1043 1044define void @store_lane_i32_with_folded_offset(<4 x i32> %v, ptr %p) { 1045; CHECK-LABEL: store_lane_i32_with_folded_offset: 1046; CHECK: .functype store_lane_i32_with_folded_offset (v128, i32) -> () 1047; CHECK-NEXT: # %bb.0: 1048; CHECK-NEXT: local.get 1 1049; CHECK-NEXT: local.get 0 1050; CHECK-NEXT: v128.store32_lane 24, 0 1051; CHECK-NEXT: # fallthrough-return 1052; 1053; MEM64-LABEL: store_lane_i32_with_folded_offset: 1054; MEM64: .functype store_lane_i32_with_folded_offset (v128, i64) -> () 1055; MEM64-NEXT: # %bb.0: 1056; MEM64-NEXT: local.get 1 1057; MEM64-NEXT: i32.wrap_i64 1058; MEM64-NEXT: i32.const 24 1059; MEM64-NEXT: i32.add 1060; MEM64-NEXT: i64.extend_i32_u 1061; MEM64-NEXT: local.get 0 1062; MEM64-NEXT: v128.store32_lane 0, 0 1063; MEM64-NEXT: # fallthrough-return 1064 %q = ptrtoint ptr %p to i32 1065 %r = add nuw i32 %q, 24 1066 %s = inttoptr i32 %r to ptr 1067 %x = extractelement <4 x i32> %v, i32 0 1068 store i32 %x, ptr %s 1069 ret void 1070} 1071 1072define void @store_lane_i32_with_folded_gep_offset(<4 x i32> %v, ptr %p) { 1073; CHECK-LABEL: store_lane_i32_with_folded_gep_offset: 1074; CHECK: .functype store_lane_i32_with_folded_gep_offset (v128, i32) -> () 1075; CHECK-NEXT: # %bb.0: 1076; CHECK-NEXT: local.get 1 1077; CHECK-NEXT: local.get 0 1078; CHECK-NEXT: v128.store32_lane 24, 0 1079; CHECK-NEXT: # fallthrough-return 1080; 1081; MEM64-LABEL: store_lane_i32_with_folded_gep_offset: 1082; MEM64: .functype store_lane_i32_with_folded_gep_offset (v128, i64) -> () 1083; MEM64-NEXT: # %bb.0: 1084; MEM64-NEXT: local.get 1 1085; MEM64-NEXT: local.get 0 1086; MEM64-NEXT: v128.store32_lane 24, 0 1087; MEM64-NEXT: # fallthrough-return 1088 %s = getelementptr inbounds i32, ptr %p, i32 6 1089 %x = extractelement <4 x i32> %v, i32 0 1090 store i32 %x, ptr %s 1091 ret void 1092} 1093 1094define void @store_lane_i32_with_unfolded_gep_negative_offset(<4 x i32> %v, ptr %p) { 1095; CHECK-LABEL: store_lane_i32_with_unfolded_gep_negative_offset: 1096; CHECK: .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i32) -> () 1097; CHECK-NEXT: # %bb.0: 1098; CHECK-NEXT: local.get 1 1099; CHECK-NEXT: i32.const -24 1100; CHECK-NEXT: i32.add 1101; CHECK-NEXT: local.get 0 1102; CHECK-NEXT: v128.store32_lane 0, 0 1103; CHECK-NEXT: # fallthrough-return 1104; 1105; MEM64-LABEL: store_lane_i32_with_unfolded_gep_negative_offset: 1106; MEM64: .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i64) -> () 1107; MEM64-NEXT: # %bb.0: 1108; MEM64-NEXT: local.get 1 1109; MEM64-NEXT: i64.const -24 1110; MEM64-NEXT: i64.add 1111; MEM64-NEXT: local.get 0 1112; MEM64-NEXT: v128.store32_lane 0, 0 1113; MEM64-NEXT: # fallthrough-return 1114 %s = getelementptr inbounds i32, ptr %p, i32 -6 1115 %x = extractelement <4 x i32> %v, i32 0 1116 store i32 %x, ptr %s 1117 ret void 1118} 1119 1120define void @store_lane_i32_with_unfolded_offset(<4 x i32> %v, ptr %p) { 1121; CHECK-LABEL: store_lane_i32_with_unfolded_offset: 1122; CHECK: .functype store_lane_i32_with_unfolded_offset (v128, i32) -> () 1123; CHECK-NEXT: # %bb.0: 1124; CHECK-NEXT: local.get 1 1125; CHECK-NEXT: i32.const 24 1126; CHECK-NEXT: i32.add 1127; CHECK-NEXT: local.get 0 1128; CHECK-NEXT: v128.store32_lane 0, 0 1129; CHECK-NEXT: # fallthrough-return 1130; 1131; MEM64-LABEL: store_lane_i32_with_unfolded_offset: 1132; MEM64: .functype store_lane_i32_with_unfolded_offset (v128, i64) -> () 1133; MEM64-NEXT: # %bb.0: 1134; MEM64-NEXT: local.get 1 1135; MEM64-NEXT: i32.wrap_i64 1136; MEM64-NEXT: i32.const 24 1137; MEM64-NEXT: i32.add 1138; MEM64-NEXT: i64.extend_i32_u 1139; MEM64-NEXT: local.get 0 1140; MEM64-NEXT: v128.store32_lane 0, 0 1141; MEM64-NEXT: # fallthrough-return 1142 %q = ptrtoint ptr %p to i32 1143 %r = add nsw i32 %q, 24 1144 %s = inttoptr i32 %r to ptr 1145 %x = extractelement <4 x i32> %v, i32 0 1146 store i32 %x, ptr %s 1147 ret void 1148} 1149 1150define void @store_lane_i32_with_unfolded_gep_offset(<4 x i32> %v, ptr %p) { 1151; CHECK-LABEL: store_lane_i32_with_unfolded_gep_offset: 1152; CHECK: .functype store_lane_i32_with_unfolded_gep_offset (v128, i32) -> () 1153; CHECK-NEXT: # %bb.0: 1154; CHECK-NEXT: local.get 1 1155; CHECK-NEXT: i32.const 24 1156; CHECK-NEXT: i32.add 1157; CHECK-NEXT: local.get 0 1158; CHECK-NEXT: v128.store32_lane 0, 0 1159; CHECK-NEXT: # fallthrough-return 1160; 1161; MEM64-LABEL: store_lane_i32_with_unfolded_gep_offset: 1162; MEM64: .functype store_lane_i32_with_unfolded_gep_offset (v128, i64) -> () 1163; MEM64-NEXT: # %bb.0: 1164; MEM64-NEXT: local.get 1 1165; MEM64-NEXT: i64.const 24 1166; MEM64-NEXT: i64.add 1167; MEM64-NEXT: local.get 0 1168; MEM64-NEXT: v128.store32_lane 0, 0 1169; MEM64-NEXT: # fallthrough-return 1170 %s = getelementptr i32, ptr %p, i32 6 1171 %x = extractelement <4 x i32> %v, i32 0 1172 store i32 %x, ptr %s 1173 ret void 1174} 1175 1176define void @store_lane_i32_to_numeric_address(<4 x i32> %v) { 1177; CHECK-LABEL: store_lane_i32_to_numeric_address: 1178; CHECK: .functype store_lane_i32_to_numeric_address (v128) -> () 1179; CHECK-NEXT: # %bb.0: 1180; CHECK-NEXT: i32.const 0 1181; CHECK-NEXT: local.get 0 1182; CHECK-NEXT: v128.store32_lane 42, 0 1183; CHECK-NEXT: # fallthrough-return 1184; 1185; MEM64-LABEL: store_lane_i32_to_numeric_address: 1186; MEM64: .functype store_lane_i32_to_numeric_address (v128) -> () 1187; MEM64-NEXT: # %bb.0: 1188; MEM64-NEXT: i64.const 0 1189; MEM64-NEXT: local.get 0 1190; MEM64-NEXT: v128.store32_lane 42, 0 1191; MEM64-NEXT: # fallthrough-return 1192 %s = inttoptr i32 42 to ptr 1193 %x = extractelement <4 x i32> %v, i32 0 1194 store i32 %x, ptr %s 1195 ret void 1196} 1197 1198define void @store_lane_i32_from_global_address(<4 x i32> %v) { 1199; CHECK-LABEL: store_lane_i32_from_global_address: 1200; CHECK: .functype store_lane_i32_from_global_address (v128) -> () 1201; CHECK-NEXT: # %bb.0: 1202; CHECK-NEXT: i32.const 0 1203; CHECK-NEXT: local.get 0 1204; CHECK-NEXT: v128.store32_lane gv_i32, 0 1205; CHECK-NEXT: # fallthrough-return 1206; 1207; MEM64-LABEL: store_lane_i32_from_global_address: 1208; MEM64: .functype store_lane_i32_from_global_address (v128) -> () 1209; MEM64-NEXT: # %bb.0: 1210; MEM64-NEXT: i64.const 0 1211; MEM64-NEXT: local.get 0 1212; MEM64-NEXT: v128.store32_lane gv_i32, 0 1213; MEM64-NEXT: # fallthrough-return 1214 %x = extractelement <4 x i32> %v, i32 0 1215 store i32 %x, ptr @gv_i32 1216 ret void 1217} 1218 1219;===---------------------------------------------------------------------------- 1220; v128.load64_lane / v128.store64_lane 1221;===---------------------------------------------------------------------------- 1222 1223define <2 x i64> @load_lane_i64_no_offset(ptr %p, <2 x i64> %v) { 1224; CHECK-LABEL: load_lane_i64_no_offset: 1225; CHECK: .functype load_lane_i64_no_offset (i32, v128) -> (v128) 1226; CHECK-NEXT: # %bb.0: 1227; CHECK-NEXT: local.get 0 1228; CHECK-NEXT: local.get 1 1229; CHECK-NEXT: v128.load64_lane 0, 0 1230; CHECK-NEXT: # fallthrough-return 1231; 1232; MEM64-LABEL: load_lane_i64_no_offset: 1233; MEM64: .functype load_lane_i64_no_offset (i64, v128) -> (v128) 1234; MEM64-NEXT: # %bb.0: 1235; MEM64-NEXT: local.get 0 1236; MEM64-NEXT: local.get 1 1237; MEM64-NEXT: v128.load64_lane 0, 0 1238; MEM64-NEXT: # fallthrough-return 1239 %x = load i64, ptr %p 1240 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1241 ret <2 x i64> %t 1242} 1243 1244define <2 x i64> @load_lane_i64_with_folded_offset(ptr %p, <2 x i64> %v) { 1245; CHECK-LABEL: load_lane_i64_with_folded_offset: 1246; CHECK: .functype load_lane_i64_with_folded_offset (i32, v128) -> (v128) 1247; CHECK-NEXT: # %bb.0: 1248; CHECK-NEXT: local.get 0 1249; CHECK-NEXT: i32.const 24 1250; CHECK-NEXT: i32.add 1251; CHECK-NEXT: local.get 1 1252; CHECK-NEXT: v128.load64_lane 0, 0 1253; CHECK-NEXT: # fallthrough-return 1254; 1255; MEM64-LABEL: load_lane_i64_with_folded_offset: 1256; MEM64: .functype load_lane_i64_with_folded_offset (i64, v128) -> (v128) 1257; MEM64-NEXT: # %bb.0: 1258; MEM64-NEXT: local.get 0 1259; MEM64-NEXT: i32.wrap_i64 1260; MEM64-NEXT: i32.const 24 1261; MEM64-NEXT: i32.add 1262; MEM64-NEXT: i64.extend_i32_u 1263; MEM64-NEXT: local.get 1 1264; MEM64-NEXT: v128.load64_lane 0, 0 1265; MEM64-NEXT: # fallthrough-return 1266 %q = ptrtoint ptr %p to i32 1267 %r = add nuw i32 %q, 24 1268 %s = inttoptr i32 %r to ptr 1269 %x = load i64, ptr %s 1270 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1271 ret <2 x i64> %t 1272} 1273 1274define <2 x i64> @load_lane_i64_with_folded_gep_offset(ptr %p, <2 x i64> %v) { 1275; CHECK-LABEL: load_lane_i64_with_folded_gep_offset: 1276; CHECK: .functype load_lane_i64_with_folded_gep_offset (i32, v128) -> (v128) 1277; CHECK-NEXT: # %bb.0: 1278; CHECK-NEXT: local.get 0 1279; CHECK-NEXT: i32.const 48 1280; CHECK-NEXT: i32.add 1281; CHECK-NEXT: local.get 1 1282; CHECK-NEXT: v128.load64_lane 0, 0 1283; CHECK-NEXT: # fallthrough-return 1284; 1285; MEM64-LABEL: load_lane_i64_with_folded_gep_offset: 1286; MEM64: .functype load_lane_i64_with_folded_gep_offset (i64, v128) -> (v128) 1287; MEM64-NEXT: # %bb.0: 1288; MEM64-NEXT: local.get 0 1289; MEM64-NEXT: i64.const 48 1290; MEM64-NEXT: i64.add 1291; MEM64-NEXT: local.get 1 1292; MEM64-NEXT: v128.load64_lane 0, 0 1293; MEM64-NEXT: # fallthrough-return 1294 %s = getelementptr inbounds i64, ptr %p, i32 6 1295 %x = load i64, ptr %s 1296 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1297 ret <2 x i64> %t 1298} 1299 1300define <2 x i64> @load_lane_i64_with_unfolded_gep_negative_offset(ptr %p, <2 x i64> %v) { 1301; CHECK-LABEL: load_lane_i64_with_unfolded_gep_negative_offset: 1302; CHECK: .functype load_lane_i64_with_unfolded_gep_negative_offset (i32, v128) -> (v128) 1303; CHECK-NEXT: # %bb.0: 1304; CHECK-NEXT: local.get 0 1305; CHECK-NEXT: i32.const -48 1306; CHECK-NEXT: i32.add 1307; CHECK-NEXT: local.get 1 1308; CHECK-NEXT: v128.load64_lane 0, 0 1309; CHECK-NEXT: # fallthrough-return 1310; 1311; MEM64-LABEL: load_lane_i64_with_unfolded_gep_negative_offset: 1312; MEM64: .functype load_lane_i64_with_unfolded_gep_negative_offset (i64, v128) -> (v128) 1313; MEM64-NEXT: # %bb.0: 1314; MEM64-NEXT: local.get 0 1315; MEM64-NEXT: i64.const -48 1316; MEM64-NEXT: i64.add 1317; MEM64-NEXT: local.get 1 1318; MEM64-NEXT: v128.load64_lane 0, 0 1319; MEM64-NEXT: # fallthrough-return 1320 %s = getelementptr inbounds i64, ptr %p, i32 -6 1321 %x = load i64, ptr %s 1322 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1323 ret <2 x i64> %t 1324} 1325 1326define <2 x i64> @load_lane_i64_with_unfolded_offset(ptr %p, <2 x i64> %v) { 1327; CHECK-LABEL: load_lane_i64_with_unfolded_offset: 1328; CHECK: .functype load_lane_i64_with_unfolded_offset (i32, v128) -> (v128) 1329; CHECK-NEXT: # %bb.0: 1330; CHECK-NEXT: local.get 0 1331; CHECK-NEXT: i32.const 24 1332; CHECK-NEXT: i32.add 1333; CHECK-NEXT: local.get 1 1334; CHECK-NEXT: v128.load64_lane 0, 0 1335; CHECK-NEXT: # fallthrough-return 1336; 1337; MEM64-LABEL: load_lane_i64_with_unfolded_offset: 1338; MEM64: .functype load_lane_i64_with_unfolded_offset (i64, v128) -> (v128) 1339; MEM64-NEXT: # %bb.0: 1340; MEM64-NEXT: local.get 0 1341; MEM64-NEXT: i32.wrap_i64 1342; MEM64-NEXT: i32.const 24 1343; MEM64-NEXT: i32.add 1344; MEM64-NEXT: i64.extend_i32_u 1345; MEM64-NEXT: local.get 1 1346; MEM64-NEXT: v128.load64_lane 0, 0 1347; MEM64-NEXT: # fallthrough-return 1348 %q = ptrtoint ptr %p to i32 1349 %r = add nsw i32 %q, 24 1350 %s = inttoptr i32 %r to ptr 1351 %x = load i64, ptr %s 1352 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1353 ret <2 x i64> %t 1354} 1355 1356define <2 x i64> @load_lane_i64_with_unfolded_gep_offset(ptr %p, <2 x i64> %v) { 1357; CHECK-LABEL: load_lane_i64_with_unfolded_gep_offset: 1358; CHECK: .functype load_lane_i64_with_unfolded_gep_offset (i32, v128) -> (v128) 1359; CHECK-NEXT: # %bb.0: 1360; CHECK-NEXT: local.get 0 1361; CHECK-NEXT: i32.const 48 1362; CHECK-NEXT: i32.add 1363; CHECK-NEXT: local.get 1 1364; CHECK-NEXT: v128.load64_lane 0, 0 1365; CHECK-NEXT: # fallthrough-return 1366; 1367; MEM64-LABEL: load_lane_i64_with_unfolded_gep_offset: 1368; MEM64: .functype load_lane_i64_with_unfolded_gep_offset (i64, v128) -> (v128) 1369; MEM64-NEXT: # %bb.0: 1370; MEM64-NEXT: local.get 0 1371; MEM64-NEXT: i64.const 48 1372; MEM64-NEXT: i64.add 1373; MEM64-NEXT: local.get 1 1374; MEM64-NEXT: v128.load64_lane 0, 0 1375; MEM64-NEXT: # fallthrough-return 1376 %s = getelementptr i64, ptr %p, i32 6 1377 %x = load i64, ptr %s 1378 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1379 ret <2 x i64> %t 1380} 1381 1382define <2 x i64> @load_lane_i64_from_numeric_address(<2 x i64> %v) { 1383; CHECK-LABEL: load_lane_i64_from_numeric_address: 1384; CHECK: .functype load_lane_i64_from_numeric_address (v128) -> (v128) 1385; CHECK-NEXT: # %bb.0: 1386; CHECK-NEXT: i32.const 42 1387; CHECK-NEXT: local.get 0 1388; CHECK-NEXT: v128.load64_lane 0, 0 1389; CHECK-NEXT: # fallthrough-return 1390; 1391; MEM64-LABEL: load_lane_i64_from_numeric_address: 1392; MEM64: .functype load_lane_i64_from_numeric_address (v128) -> (v128) 1393; MEM64-NEXT: # %bb.0: 1394; MEM64-NEXT: i64.const 42 1395; MEM64-NEXT: local.get 0 1396; MEM64-NEXT: v128.load64_lane 0, 0 1397; MEM64-NEXT: # fallthrough-return 1398 %s = inttoptr i32 42 to ptr 1399 %x = load i64, ptr %s 1400 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1401 ret <2 x i64> %t 1402} 1403 1404@gv_i64 = global i64 0 1405define <2 x i64> @load_lane_i64_from_global_address(<2 x i64> %v) { 1406; CHECK-LABEL: load_lane_i64_from_global_address: 1407; CHECK: .functype load_lane_i64_from_global_address (v128) -> (v128) 1408; CHECK-NEXT: # %bb.0: 1409; CHECK-NEXT: i32.const gv_i64 1410; CHECK-NEXT: local.get 0 1411; CHECK-NEXT: v128.load64_lane 0, 0 1412; CHECK-NEXT: # fallthrough-return 1413; 1414; MEM64-LABEL: load_lane_i64_from_global_address: 1415; MEM64: .functype load_lane_i64_from_global_address (v128) -> (v128) 1416; MEM64-NEXT: # %bb.0: 1417; MEM64-NEXT: i64.const gv_i64 1418; MEM64-NEXT: local.get 0 1419; MEM64-NEXT: v128.load64_lane 0, 0 1420; MEM64-NEXT: # fallthrough-return 1421 %x = load i64, ptr @gv_i64 1422 %t = insertelement <2 x i64> %v, i64 %x, i32 0 1423 ret <2 x i64> %t 1424} 1425 1426define void @store_lane_i64_no_offset(<2 x i64> %v, ptr %p) { 1427; CHECK-LABEL: store_lane_i64_no_offset: 1428; CHECK: .functype store_lane_i64_no_offset (v128, i32) -> () 1429; CHECK-NEXT: # %bb.0: 1430; CHECK-NEXT: local.get 1 1431; CHECK-NEXT: local.get 0 1432; CHECK-NEXT: v128.store64_lane 0, 0 1433; CHECK-NEXT: # fallthrough-return 1434; 1435; MEM64-LABEL: store_lane_i64_no_offset: 1436; MEM64: .functype store_lane_i64_no_offset (v128, i64) -> () 1437; MEM64-NEXT: # %bb.0: 1438; MEM64-NEXT: local.get 1 1439; MEM64-NEXT: local.get 0 1440; MEM64-NEXT: v128.store64_lane 0, 0 1441; MEM64-NEXT: # fallthrough-return 1442 %x = extractelement <2 x i64> %v, i32 0 1443 store i64 %x, ptr %p 1444 ret void 1445} 1446 1447define void @store_lane_i64_with_folded_offset(<2 x i64> %v, ptr %p) { 1448; CHECK-LABEL: store_lane_i64_with_folded_offset: 1449; CHECK: .functype store_lane_i64_with_folded_offset (v128, i32) -> () 1450; CHECK-NEXT: # %bb.0: 1451; CHECK-NEXT: local.get 1 1452; CHECK-NEXT: local.get 0 1453; CHECK-NEXT: v128.store64_lane 24, 0 1454; CHECK-NEXT: # fallthrough-return 1455; 1456; MEM64-LABEL: store_lane_i64_with_folded_offset: 1457; MEM64: .functype store_lane_i64_with_folded_offset (v128, i64) -> () 1458; MEM64-NEXT: # %bb.0: 1459; MEM64-NEXT: local.get 1 1460; MEM64-NEXT: i32.wrap_i64 1461; MEM64-NEXT: i32.const 24 1462; MEM64-NEXT: i32.add 1463; MEM64-NEXT: i64.extend_i32_u 1464; MEM64-NEXT: local.get 0 1465; MEM64-NEXT: v128.store64_lane 0, 0 1466; MEM64-NEXT: # fallthrough-return 1467 %q = ptrtoint ptr %p to i32 1468 %r = add nuw i32 %q, 24 1469 %s = inttoptr i32 %r to ptr 1470 %x = extractelement <2 x i64> %v, i32 0 1471 store i64 %x, ptr %s 1472 ret void 1473} 1474 1475define void @store_lane_i64_with_folded_gep_offset(<2 x i64> %v, ptr %p) { 1476; CHECK-LABEL: store_lane_i64_with_folded_gep_offset: 1477; CHECK: .functype store_lane_i64_with_folded_gep_offset (v128, i32) -> () 1478; CHECK-NEXT: # %bb.0: 1479; CHECK-NEXT: local.get 1 1480; CHECK-NEXT: local.get 0 1481; CHECK-NEXT: v128.store64_lane 48, 0 1482; CHECK-NEXT: # fallthrough-return 1483; 1484; MEM64-LABEL: store_lane_i64_with_folded_gep_offset: 1485; MEM64: .functype store_lane_i64_with_folded_gep_offset (v128, i64) -> () 1486; MEM64-NEXT: # %bb.0: 1487; MEM64-NEXT: local.get 1 1488; MEM64-NEXT: local.get 0 1489; MEM64-NEXT: v128.store64_lane 48, 0 1490; MEM64-NEXT: # fallthrough-return 1491 %s = getelementptr inbounds i64, ptr %p, i32 6 1492 %x = extractelement <2 x i64> %v, i32 0 1493 store i64 %x, ptr %s 1494 ret void 1495} 1496 1497define void @store_lane_i64_with_unfolded_gep_negative_offset(<2 x i64> %v, ptr %p) { 1498; CHECK-LABEL: store_lane_i64_with_unfolded_gep_negative_offset: 1499; CHECK: .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i32) -> () 1500; CHECK-NEXT: # %bb.0: 1501; CHECK-NEXT: local.get 1 1502; CHECK-NEXT: i32.const -48 1503; CHECK-NEXT: i32.add 1504; CHECK-NEXT: local.get 0 1505; CHECK-NEXT: v128.store64_lane 0, 0 1506; CHECK-NEXT: # fallthrough-return 1507; 1508; MEM64-LABEL: store_lane_i64_with_unfolded_gep_negative_offset: 1509; MEM64: .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i64) -> () 1510; MEM64-NEXT: # %bb.0: 1511; MEM64-NEXT: local.get 1 1512; MEM64-NEXT: i64.const -48 1513; MEM64-NEXT: i64.add 1514; MEM64-NEXT: local.get 0 1515; MEM64-NEXT: v128.store64_lane 0, 0 1516; MEM64-NEXT: # fallthrough-return 1517 %s = getelementptr inbounds i64, ptr %p, i32 -6 1518 %x = extractelement <2 x i64> %v, i32 0 1519 store i64 %x, ptr %s 1520 ret void 1521} 1522 1523define void @store_lane_i64_with_unfolded_offset(<2 x i64> %v, ptr %p) { 1524; CHECK-LABEL: store_lane_i64_with_unfolded_offset: 1525; CHECK: .functype store_lane_i64_with_unfolded_offset (v128, i32) -> () 1526; CHECK-NEXT: # %bb.0: 1527; CHECK-NEXT: local.get 1 1528; CHECK-NEXT: i32.const 24 1529; CHECK-NEXT: i32.add 1530; CHECK-NEXT: local.get 0 1531; CHECK-NEXT: v128.store64_lane 0, 0 1532; CHECK-NEXT: # fallthrough-return 1533; 1534; MEM64-LABEL: store_lane_i64_with_unfolded_offset: 1535; MEM64: .functype store_lane_i64_with_unfolded_offset (v128, i64) -> () 1536; MEM64-NEXT: # %bb.0: 1537; MEM64-NEXT: local.get 1 1538; MEM64-NEXT: i32.wrap_i64 1539; MEM64-NEXT: i32.const 24 1540; MEM64-NEXT: i32.add 1541; MEM64-NEXT: i64.extend_i32_u 1542; MEM64-NEXT: local.get 0 1543; MEM64-NEXT: v128.store64_lane 0, 0 1544; MEM64-NEXT: # fallthrough-return 1545 %q = ptrtoint ptr %p to i32 1546 %r = add nsw i32 %q, 24 1547 %s = inttoptr i32 %r to ptr 1548 %x = extractelement <2 x i64> %v, i32 0 1549 store i64 %x, ptr %s 1550 ret void 1551} 1552 1553define void @store_lane_i64_with_unfolded_gep_offset(<2 x i64> %v, ptr %p) { 1554; CHECK-LABEL: store_lane_i64_with_unfolded_gep_offset: 1555; CHECK: .functype store_lane_i64_with_unfolded_gep_offset (v128, i32) -> () 1556; CHECK-NEXT: # %bb.0: 1557; CHECK-NEXT: local.get 1 1558; CHECK-NEXT: i32.const 48 1559; CHECK-NEXT: i32.add 1560; CHECK-NEXT: local.get 0 1561; CHECK-NEXT: v128.store64_lane 0, 0 1562; CHECK-NEXT: # fallthrough-return 1563; 1564; MEM64-LABEL: store_lane_i64_with_unfolded_gep_offset: 1565; MEM64: .functype store_lane_i64_with_unfolded_gep_offset (v128, i64) -> () 1566; MEM64-NEXT: # %bb.0: 1567; MEM64-NEXT: local.get 1 1568; MEM64-NEXT: i64.const 48 1569; MEM64-NEXT: i64.add 1570; MEM64-NEXT: local.get 0 1571; MEM64-NEXT: v128.store64_lane 0, 0 1572; MEM64-NEXT: # fallthrough-return 1573 %s = getelementptr i64, ptr %p, i32 6 1574 %x = extractelement <2 x i64> %v, i32 0 1575 store i64 %x, ptr %s 1576 ret void 1577} 1578 1579define void @store_lane_i64_to_numeric_address(<2 x i64> %v) { 1580; CHECK-LABEL: store_lane_i64_to_numeric_address: 1581; CHECK: .functype store_lane_i64_to_numeric_address (v128) -> () 1582; CHECK-NEXT: # %bb.0: 1583; CHECK-NEXT: i32.const 0 1584; CHECK-NEXT: local.get 0 1585; CHECK-NEXT: v128.store64_lane 42, 0 1586; CHECK-NEXT: # fallthrough-return 1587; 1588; MEM64-LABEL: store_lane_i64_to_numeric_address: 1589; MEM64: .functype store_lane_i64_to_numeric_address (v128) -> () 1590; MEM64-NEXT: # %bb.0: 1591; MEM64-NEXT: i64.const 0 1592; MEM64-NEXT: local.get 0 1593; MEM64-NEXT: v128.store64_lane 42, 0 1594; MEM64-NEXT: # fallthrough-return 1595 %s = inttoptr i32 42 to ptr 1596 %x = extractelement <2 x i64> %v, i32 0 1597 store i64 %x, ptr %s 1598 ret void 1599} 1600 1601define void @store_lane_i64_from_global_address(<2 x i64> %v) { 1602; CHECK-LABEL: store_lane_i64_from_global_address: 1603; CHECK: .functype store_lane_i64_from_global_address (v128) -> () 1604; CHECK-NEXT: # %bb.0: 1605; CHECK-NEXT: i32.const 0 1606; CHECK-NEXT: local.get 0 1607; CHECK-NEXT: v128.store64_lane gv_i64, 0 1608; CHECK-NEXT: # fallthrough-return 1609; 1610; MEM64-LABEL: store_lane_i64_from_global_address: 1611; MEM64: .functype store_lane_i64_from_global_address (v128) -> () 1612; MEM64-NEXT: # %bb.0: 1613; MEM64-NEXT: i64.const 0 1614; MEM64-NEXT: local.get 0 1615; MEM64-NEXT: v128.store64_lane gv_i64, 0 1616; MEM64-NEXT: # fallthrough-return 1617 %x = extractelement <2 x i64> %v, i32 0 1618 store i64 %x, ptr @gv_i64 1619 ret void 1620} 1621