1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-SDAG 3; RUN: llc < %s -global-isel -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-GISEL 4 5define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) { 6; CHECK-SDAG-LABEL: test_varidx_extract_v8s8: 7; CHECK-SDAG: // %bb.0: 8; CHECK-SDAG-NEXT: sub sp, sp, #16 9; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 10; CHECK-SDAG-NEXT: add x8, sp, #8 11; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 12; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 13; CHECK-SDAG-NEXT: str d0, [sp, #8] 14; CHECK-SDAG-NEXT: umov w9, v0.b[1] 15; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #3 16; CHECK-SDAG-NEXT: ld1 { v1.b }[0], [x8] 17; CHECK-SDAG-NEXT: umov w8, v0.b[2] 18; CHECK-SDAG-NEXT: mov v1.h[1], w9 19; CHECK-SDAG-NEXT: umov w9, v0.b[3] 20; CHECK-SDAG-NEXT: mov v1.h[2], w8 21; CHECK-SDAG-NEXT: mov v1.h[3], w9 22; CHECK-SDAG-NEXT: fmov d0, d1 23; CHECK-SDAG-NEXT: add sp, sp, #16 24; CHECK-SDAG-NEXT: ret 25; 26; CHECK-GISEL-LABEL: test_varidx_extract_v8s8: 27; CHECK-GISEL: // %bb.0: 28; CHECK-GISEL-NEXT: sub sp, sp, #16 29; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 30; CHECK-GISEL-NEXT: mov w9, w0 31; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 32; CHECK-GISEL-NEXT: mov b1, v0.b[1] 33; CHECK-GISEL-NEXT: add x8, sp, #8 34; CHECK-GISEL-NEXT: and x9, x9, #0x7 35; CHECK-GISEL-NEXT: str d0, [sp, #8] 36; CHECK-GISEL-NEXT: mov b2, v0.b[2] 37; CHECK-GISEL-NEXT: lsl x10, x9, #1 38; CHECK-GISEL-NEXT: mov b0, v0.b[3] 39; CHECK-GISEL-NEXT: sub x9, x10, x9 40; CHECK-GISEL-NEXT: ldrb w8, [x8, x9] 41; CHECK-GISEL-NEXT: fmov w9, s1 42; CHECK-GISEL-NEXT: fmov s1, w8 43; CHECK-GISEL-NEXT: fmov w8, s2 44; CHECK-GISEL-NEXT: mov v1.h[1], w9 45; CHECK-GISEL-NEXT: mov v1.h[2], w8 46; CHECK-GISEL-NEXT: fmov w8, s0 47; CHECK-GISEL-NEXT: mov v1.h[3], w8 48; CHECK-GISEL-NEXT: fmov d0, d1 49; CHECK-GISEL-NEXT: add sp, sp, #16 50; CHECK-GISEL-NEXT: ret 51 %tmp = extractelement <8 x i8> %x, i32 %idx 52 %tmp2 = insertelement <4 x i8> undef, i8 %tmp, i32 0 53 %tmp3 = extractelement <8 x i8> %x, i32 1 54 %tmp4 = insertelement <4 x i8> %tmp2, i8 %tmp3, i32 1 55 %tmp5 = extractelement <8 x i8> %x, i32 2 56 %tmp6 = insertelement <4 x i8> %tmp4, i8 %tmp5, i32 2 57 %tmp7 = extractelement <8 x i8> %x, i32 3 58 %tmp8 = insertelement <4 x i8> %tmp6, i8 %tmp7, i32 3 59 ret <4 x i8> %tmp8 60} 61 62define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) { 63; CHECK-SDAG-LABEL: test_varidx_extract_v16s8: 64; CHECK-SDAG: // %bb.0: 65; CHECK-SDAG-NEXT: sub sp, sp, #16 66; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 67; CHECK-SDAG-NEXT: mov x8, sp 68; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 69; CHECK-SDAG-NEXT: str q0, [sp] 70; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #4 71; CHECK-SDAG-NEXT: ldr b1, [x8] 72; CHECK-SDAG-NEXT: mov v1.b[1], v0.b[1] 73; CHECK-SDAG-NEXT: mov v1.b[2], v0.b[2] 74; CHECK-SDAG-NEXT: mov v1.b[3], v0.b[3] 75; CHECK-SDAG-NEXT: mov v1.b[4], v0.b[4] 76; CHECK-SDAG-NEXT: mov v1.b[5], v0.b[5] 77; CHECK-SDAG-NEXT: mov v1.b[6], v0.b[6] 78; CHECK-SDAG-NEXT: mov v1.b[7], v0.b[7] 79; CHECK-SDAG-NEXT: fmov d0, d1 80; CHECK-SDAG-NEXT: add sp, sp, #16 81; CHECK-SDAG-NEXT: ret 82; 83; CHECK-GISEL-LABEL: test_varidx_extract_v16s8: 84; CHECK-GISEL: // %bb.0: 85; CHECK-GISEL-NEXT: sub sp, sp, #16 86; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 87; CHECK-GISEL-NEXT: mov w9, w0 88; CHECK-GISEL-NEXT: mov x8, sp 89; CHECK-GISEL-NEXT: str q0, [sp] 90; CHECK-GISEL-NEXT: and x9, x9, #0xf 91; CHECK-GISEL-NEXT: mov b2, v0.b[1] 92; CHECK-GISEL-NEXT: mov b3, v0.b[2] 93; CHECK-GISEL-NEXT: lsl x10, x9, #1 94; CHECK-GISEL-NEXT: sub x9, x10, x9 95; CHECK-GISEL-NEXT: ldr b1, [x8, x9] 96; CHECK-GISEL-NEXT: mov v1.b[0], v1.b[0] 97; CHECK-GISEL-NEXT: mov v1.b[1], v2.b[0] 98; CHECK-GISEL-NEXT: mov b2, v0.b[3] 99; CHECK-GISEL-NEXT: mov v1.b[2], v3.b[0] 100; CHECK-GISEL-NEXT: mov b3, v0.b[4] 101; CHECK-GISEL-NEXT: mov v1.b[3], v2.b[0] 102; CHECK-GISEL-NEXT: mov b2, v0.b[5] 103; CHECK-GISEL-NEXT: mov v1.b[4], v3.b[0] 104; CHECK-GISEL-NEXT: mov b3, v0.b[6] 105; CHECK-GISEL-NEXT: mov b0, v0.b[7] 106; CHECK-GISEL-NEXT: mov v1.b[5], v2.b[0] 107; CHECK-GISEL-NEXT: mov v1.b[6], v3.b[0] 108; CHECK-GISEL-NEXT: mov v1.b[7], v0.b[0] 109; CHECK-GISEL-NEXT: fmov d0, d1 110; CHECK-GISEL-NEXT: add sp, sp, #16 111; CHECK-GISEL-NEXT: ret 112 %tmp = extractelement <16 x i8> %x, i32 %idx 113 %tmp2 = insertelement <8 x i8> undef, i8 %tmp, i32 0 114 %tmp3 = extractelement <16 x i8> %x, i32 1 115 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 1 116 %tmp5 = extractelement <16 x i8> %x, i32 2 117 %tmp6 = insertelement <8 x i8> %tmp4, i8 %tmp5, i32 2 118 %tmp7 = extractelement <16 x i8> %x, i32 3 119 %tmp8 = insertelement <8 x i8> %tmp6, i8 %tmp7, i32 3 120 %tmp9 = extractelement <16 x i8> %x, i32 4 121 %tmp10 = insertelement <8 x i8> %tmp8, i8 %tmp9, i32 4 122 %tmp11 = extractelement <16 x i8> %x, i32 5 123 %tmp12 = insertelement <8 x i8> %tmp10, i8 %tmp11, i32 5 124 %tmp13 = extractelement <16 x i8> %x, i32 6 125 %tmp14 = insertelement <8 x i8> %tmp12, i8 %tmp13, i32 6 126 %tmp15 = extractelement <16 x i8> %x, i32 7 127 %tmp16 = insertelement <8 x i8> %tmp14, i8 %tmp15, i32 7 128 ret <8 x i8> %tmp16 129} 130 131define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) { 132; CHECK-SDAG-LABEL: test_varidx_extract_v2s16: 133; CHECK-SDAG: // %bb.0: 134; CHECK-SDAG-NEXT: sub sp, sp, #16 135; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 136; CHECK-SDAG-NEXT: add x8, sp, #8 137; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 138; CHECK-SDAG-NEXT: str d0, [sp, #8] 139; CHECK-SDAG-NEXT: bfi x8, x0, #2, #1 140; CHECK-SDAG-NEXT: ldr w0, [x8] 141; CHECK-SDAG-NEXT: add sp, sp, #16 142; CHECK-SDAG-NEXT: ret 143; 144; CHECK-GISEL-LABEL: test_varidx_extract_v2s16: 145; CHECK-GISEL: // %bb.0: 146; CHECK-GISEL-NEXT: sub sp, sp, #16 147; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 148; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 149; CHECK-GISEL-NEXT: mov s1, v0.s[1] 150; CHECK-GISEL-NEXT: mov w9, w0 151; CHECK-GISEL-NEXT: add x8, sp, #12 152; CHECK-GISEL-NEXT: str h0, [sp, #12] 153; CHECK-GISEL-NEXT: and x9, x9, #0x1 154; CHECK-GISEL-NEXT: str h1, [sp, #14] 155; CHECK-GISEL-NEXT: ldrh w0, [x8, x9, lsl #1] 156; CHECK-GISEL-NEXT: add sp, sp, #16 157; CHECK-GISEL-NEXT: ret 158 %tmp = extractelement <2 x i16> %x, i32 %idx 159 ret i16 %tmp 160} 161 162define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) { 163; CHECK-SDAG-LABEL: test_varidx_extract_v4s16: 164; CHECK-SDAG: // %bb.0: 165; CHECK-SDAG-NEXT: sub sp, sp, #16 166; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 167; CHECK-SDAG-NEXT: add x8, sp, #8 168; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 169; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0 170; CHECK-SDAG-NEXT: str d0, [sp, #8] 171; CHECK-SDAG-NEXT: umov w9, v0.h[1] 172; CHECK-SDAG-NEXT: bfi x8, x0, #1, #2 173; CHECK-SDAG-NEXT: ld1 { v0.h }[0], [x8] 174; CHECK-SDAG-NEXT: mov v0.s[1], w9 175; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0 176; CHECK-SDAG-NEXT: add sp, sp, #16 177; CHECK-SDAG-NEXT: ret 178; 179; CHECK-GISEL-LABEL: test_varidx_extract_v4s16: 180; CHECK-GISEL: // %bb.0: 181; CHECK-GISEL-NEXT: sub sp, sp, #16 182; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 183; CHECK-GISEL-NEXT: mov w9, w0 184; CHECK-GISEL-NEXT: mov w8, #2 // =0x2 185; CHECK-GISEL-NEXT: add x10, sp, #8 186; CHECK-GISEL-NEXT: and x9, x9, #0x3 187; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 188; CHECK-GISEL-NEXT: str d0, [sp, #8] 189; CHECK-GISEL-NEXT: madd x8, x9, x8, x10 190; CHECK-GISEL-NEXT: umov w9, v0.h[1] 191; CHECK-GISEL-NEXT: ld1 { v0.h }[0], [x8] 192; CHECK-GISEL-NEXT: mov v0.s[1], w9 193; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 194; CHECK-GISEL-NEXT: add sp, sp, #16 195; CHECK-GISEL-NEXT: ret 196 %tmp = extractelement <4 x i16> %x, i32 %idx 197 %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 0 198 %tmp3 = extractelement <4 x i16> %x, i32 1 199 %tmp4 = insertelement <2 x i16> %tmp2, i16 %tmp3, i32 1 200 ret <2 x i16> %tmp4 201} 202 203define <4 x i16> @test_varidx_extract_v8s16(<8 x i16> %x, i32 %idx) { 204; CHECK-SDAG-LABEL: test_varidx_extract_v8s16: 205; CHECK-SDAG: // %bb.0: 206; CHECK-SDAG-NEXT: sub sp, sp, #16 207; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 208; CHECK-SDAG-NEXT: mov x8, sp 209; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 210; CHECK-SDAG-NEXT: str q0, [sp] 211; CHECK-SDAG-NEXT: bfi x8, x0, #1, #3 212; CHECK-SDAG-NEXT: ldr h1, [x8] 213; CHECK-SDAG-NEXT: mov v1.h[1], v0.h[1] 214; CHECK-SDAG-NEXT: mov v1.h[2], v0.h[2] 215; CHECK-SDAG-NEXT: mov v1.h[3], v0.h[3] 216; CHECK-SDAG-NEXT: fmov d0, d1 217; CHECK-SDAG-NEXT: add sp, sp, #16 218; CHECK-SDAG-NEXT: ret 219; 220; CHECK-GISEL-LABEL: test_varidx_extract_v8s16: 221; CHECK-GISEL: // %bb.0: 222; CHECK-GISEL-NEXT: sub sp, sp, #16 223; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 224; CHECK-GISEL-NEXT: mov w9, w0 225; CHECK-GISEL-NEXT: mov x8, sp 226; CHECK-GISEL-NEXT: str q0, [sp] 227; CHECK-GISEL-NEXT: and x9, x9, #0x7 228; CHECK-GISEL-NEXT: ldr h1, [x8, x9, lsl #1] 229; CHECK-GISEL-NEXT: mov v1.h[1], v0.h[1] 230; CHECK-GISEL-NEXT: mov v1.h[2], v0.h[2] 231; CHECK-GISEL-NEXT: mov v1.h[3], v0.h[3] 232; CHECK-GISEL-NEXT: fmov d0, d1 233; CHECK-GISEL-NEXT: add sp, sp, #16 234; CHECK-GISEL-NEXT: ret 235 %tmp = extractelement <8 x i16> %x, i32 %idx 236 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 237 %tmp3 = extractelement <8 x i16> %x, i32 1 238 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 239 %tmp5 = extractelement <8 x i16> %x, i32 2 240 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 241 %tmp7 = extractelement <8 x i16> %x, i32 3 242 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 243 ret <4 x i16> %tmp8 244} 245 246define i32 @test_varidx_extract_v2s32(<2 x i32> %x, i32 %idx) { 247; CHECK-SDAG-LABEL: test_varidx_extract_v2s32: 248; CHECK-SDAG: // %bb.0: 249; CHECK-SDAG-NEXT: sub sp, sp, #16 250; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 251; CHECK-SDAG-NEXT: add x8, sp, #8 252; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 253; CHECK-SDAG-NEXT: str d0, [sp, #8] 254; CHECK-SDAG-NEXT: bfi x8, x0, #2, #1 255; CHECK-SDAG-NEXT: ldr w0, [x8] 256; CHECK-SDAG-NEXT: add sp, sp, #16 257; CHECK-SDAG-NEXT: ret 258; 259; CHECK-GISEL-LABEL: test_varidx_extract_v2s32: 260; CHECK-GISEL: // %bb.0: 261; CHECK-GISEL-NEXT: sub sp, sp, #16 262; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 263; CHECK-GISEL-NEXT: mov w9, w0 264; CHECK-GISEL-NEXT: add x8, sp, #8 265; CHECK-GISEL-NEXT: str d0, [sp, #8] 266; CHECK-GISEL-NEXT: and x9, x9, #0x1 267; CHECK-GISEL-NEXT: ldr w0, [x8, x9, lsl #2] 268; CHECK-GISEL-NEXT: add sp, sp, #16 269; CHECK-GISEL-NEXT: ret 270 %tmp = extractelement <2 x i32> %x, i32 %idx 271 ret i32 %tmp 272} 273 274define <2 x i32> @test_varidx_extract_v4s32(<4 x i32> %x, i32 %idx) { 275; CHECK-SDAG-LABEL: test_varidx_extract_v4s32: 276; CHECK-SDAG: // %bb.0: 277; CHECK-SDAG-NEXT: sub sp, sp, #16 278; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 279; CHECK-SDAG-NEXT: mov x8, sp 280; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 281; CHECK-SDAG-NEXT: str q0, [sp] 282; CHECK-SDAG-NEXT: bfi x8, x0, #2, #2 283; CHECK-SDAG-NEXT: ldr s1, [x8] 284; CHECK-SDAG-NEXT: mov v1.s[1], v0.s[1] 285; CHECK-SDAG-NEXT: fmov d0, d1 286; CHECK-SDAG-NEXT: add sp, sp, #16 287; CHECK-SDAG-NEXT: ret 288; 289; CHECK-GISEL-LABEL: test_varidx_extract_v4s32: 290; CHECK-GISEL: // %bb.0: 291; CHECK-GISEL-NEXT: sub sp, sp, #16 292; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 293; CHECK-GISEL-NEXT: mov w9, w0 294; CHECK-GISEL-NEXT: mov x8, sp 295; CHECK-GISEL-NEXT: str q0, [sp] 296; CHECK-GISEL-NEXT: and x9, x9, #0x3 297; CHECK-GISEL-NEXT: ldr s1, [x8, x9, lsl #2] 298; CHECK-GISEL-NEXT: mov v1.s[1], v0.s[1] 299; CHECK-GISEL-NEXT: fmov d0, d1 300; CHECK-GISEL-NEXT: add sp, sp, #16 301; CHECK-GISEL-NEXT: ret 302 %tmp = extractelement <4 x i32> %x, i32 %idx 303 %tmp2 = insertelement <2 x i32> undef, i32 %tmp, i32 0 304 %tmp3 = extractelement <4 x i32> %x, i32 1 305 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 306 ret <2 x i32> %tmp4 307} 308 309define i64 @test_varidx_extract_v2s64(<2 x i64> %x, i32 %idx) { 310; CHECK-SDAG-LABEL: test_varidx_extract_v2s64: 311; CHECK-SDAG: // %bb.0: 312; CHECK-SDAG-NEXT: sub sp, sp, #16 313; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 314; CHECK-SDAG-NEXT: mov x8, sp 315; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 316; CHECK-SDAG-NEXT: str q0, [sp] 317; CHECK-SDAG-NEXT: bfi x8, x0, #3, #1 318; CHECK-SDAG-NEXT: ldr x0, [x8] 319; CHECK-SDAG-NEXT: add sp, sp, #16 320; CHECK-SDAG-NEXT: ret 321; 322; CHECK-GISEL-LABEL: test_varidx_extract_v2s64: 323; CHECK-GISEL: // %bb.0: 324; CHECK-GISEL-NEXT: sub sp, sp, #16 325; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 326; CHECK-GISEL-NEXT: mov w9, w0 327; CHECK-GISEL-NEXT: mov x8, sp 328; CHECK-GISEL-NEXT: str q0, [sp] 329; CHECK-GISEL-NEXT: and x9, x9, #0x1 330; CHECK-GISEL-NEXT: ldr x0, [x8, x9, lsl #3] 331; CHECK-GISEL-NEXT: add sp, sp, #16 332; CHECK-GISEL-NEXT: ret 333 %tmp = extractelement <2 x i64> %x, i32 %idx 334 ret i64 %tmp 335} 336 337define ptr @test_varidx_extract_v2p0(<2 x ptr> %x, i32 %idx) { 338; CHECK-SDAG-LABEL: test_varidx_extract_v2p0: 339; CHECK-SDAG: // %bb.0: 340; CHECK-SDAG-NEXT: sub sp, sp, #16 341; CHECK-SDAG-NEXT: .cfi_def_cfa_offset 16 342; CHECK-SDAG-NEXT: mov x8, sp 343; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0 344; CHECK-SDAG-NEXT: str q0, [sp] 345; CHECK-SDAG-NEXT: bfi x8, x0, #3, #1 346; CHECK-SDAG-NEXT: ldr x0, [x8] 347; CHECK-SDAG-NEXT: add sp, sp, #16 348; CHECK-SDAG-NEXT: ret 349; 350; CHECK-GISEL-LABEL: test_varidx_extract_v2p0: 351; CHECK-GISEL: // %bb.0: 352; CHECK-GISEL-NEXT: sub sp, sp, #16 353; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16 354; CHECK-GISEL-NEXT: mov w9, w0 355; CHECK-GISEL-NEXT: mov x8, sp 356; CHECK-GISEL-NEXT: str q0, [sp] 357; CHECK-GISEL-NEXT: and x9, x9, #0x1 358; CHECK-GISEL-NEXT: ldr x0, [x8, x9, lsl #3] 359; CHECK-GISEL-NEXT: add sp, sp, #16 360; CHECK-GISEL-NEXT: ret 361 %tmp = extractelement <2 x ptr> %x, i32 %idx 362 ret ptr %tmp 363} 364