1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; CHECK-GI: warning: Instruction selection used fallback path for extract_v4i32_vector_insert 6; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_insert_const 7; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract 8; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const 9 10define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) { 11; CHECK-SD-LABEL: extract_v2i64_undef_index: 12; CHECK-SD: // %bb.0: // %entry 13; CHECK-SD-NEXT: fmov x0, d0 14; CHECK-SD-NEXT: ret 15; 16; CHECK-GI-LABEL: extract_v2i64_undef_index: 17; CHECK-GI: // %bb.0: // %entry 18; CHECK-GI-NEXT: str q0, [sp, #-16]! 19; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 20; CHECK-GI-NEXT: ldr x0, [sp], #16 21; CHECK-GI-NEXT: ret 22entry: 23 %d = extractelement <2 x i64> %a, i32 undef 24 ret i64 %d 25} 26 27define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) { 28; CHECK-LABEL: extract_v2i64_undef_vector: 29; CHECK: // %bb.0: // %entry 30; CHECK-NEXT: ret 31entry: 32 %d = extractelement <2 x i64> undef, i32 %c 33 ret i64 %d 34} 35 36define i64 @extract_v2i64_opaque(<2 x i64> %a, i32 %c) { 37; CHECK-SD-LABEL: extract_v2i64_opaque: 38; CHECK-SD: // %bb.0: // %entry 39; CHECK-SD-NEXT: sub sp, sp, #16 40; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 41; CHECK-SD-NEXT: mov x8, sp 42; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 43; CHECK-SD-NEXT: str q0, [sp] 44; CHECK-SD-NEXT: bfi x8, x0, #3, #1 45; CHECK-SD-NEXT: ldr x0, [x8] 46; CHECK-SD-NEXT: add sp, sp, #16 47; CHECK-SD-NEXT: ret 48; 49; CHECK-GI-LABEL: extract_v2i64_opaque: 50; CHECK-GI: // %bb.0: // %entry 51; CHECK-GI-NEXT: sub sp, sp, #16 52; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 53; CHECK-GI-NEXT: mov w9, w0 54; CHECK-GI-NEXT: mov x8, sp 55; CHECK-GI-NEXT: str q0, [sp] 56; CHECK-GI-NEXT: and x9, x9, #0x1 57; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] 58; CHECK-GI-NEXT: add sp, sp, #16 59; CHECK-GI-NEXT: ret 60entry: 61 %d = extractelement <2 x i64> %a, i32 %c 62 ret i64 %d 63} 64 65define i64 @extract_v2i64_oob(<2 x i64> %a, i32 %c) { 66; CHECK-LABEL: extract_v2i64_oob: 67; CHECK: // %bb.0: // %entry 68; CHECK-NEXT: ret 69entry: 70 %d = extractelement <2 x i64> %a, i32 5 71 ret i64 %d 72} 73 74define i64 @extract_v2i64_freeze(<2 x i64> %a, i32 %c) { 75; CHECK-SD-LABEL: extract_v2i64_freeze: 76; CHECK-SD: // %bb.0: // %entry 77; CHECK-SD-NEXT: sub sp, sp, #16 78; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 79; CHECK-SD-NEXT: mov x8, sp 80; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 81; CHECK-SD-NEXT: str q0, [sp] 82; CHECK-SD-NEXT: bfi x8, x0, #3, #1 83; CHECK-SD-NEXT: ldr x0, [x8] 84; CHECK-SD-NEXT: add sp, sp, #16 85; CHECK-SD-NEXT: ret 86; 87; CHECK-GI-LABEL: extract_v2i64_freeze: 88; CHECK-GI: // %bb.0: // %entry 89; CHECK-GI-NEXT: sub sp, sp, #16 90; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 91; CHECK-GI-NEXT: mov w9, w0 92; CHECK-GI-NEXT: mov x8, sp 93; CHECK-GI-NEXT: str q0, [sp] 94; CHECK-GI-NEXT: and x9, x9, #0x1 95; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] 96; CHECK-GI-NEXT: add sp, sp, #16 97; CHECK-GI-NEXT: ret 98entry: 99 %fvector = freeze <2 x i64> %a 100 %d = extractelement <2 x i64> %fvector, i32 %c 101 ret i64 %d 102} 103 104define i64 @extract_v2i64_extract_of_insert(<2 x i64> %a, i64 %element, i64 %c) { 105; CHECK-LABEL: extract_v2i64_extract_of_insert: 106; CHECK: // %bb.0: // %entry 107; CHECK-NEXT: ret 108entry: 109 %vector = insertelement <2 x i64> %a, i64 %element, i64 %c 110 %d = extractelement <2 x i64> %vector, i64 %c 111 ret i64 %d 112} 113 114define i64 @extract_v2i64_extract_of_insert_different_const(<2 x i64> %a, i64 %element) { 115; CHECK-SD-LABEL: extract_v2i64_extract_of_insert_different_const: 116; CHECK-SD: // %bb.0: // %entry 117; CHECK-SD-NEXT: mov x0, v0.d[1] 118; CHECK-SD-NEXT: ret 119; 120; CHECK-GI-LABEL: extract_v2i64_extract_of_insert_different_const: 121; CHECK-GI: // %bb.0: // %entry 122; CHECK-GI-NEXT: mov d0, v0.d[1] 123; CHECK-GI-NEXT: fmov x0, d0 124; CHECK-GI-NEXT: ret 125entry: 126 %vector = insertelement <2 x i64> %a, i64 %element, i64 0 127 %d = extractelement <2 x i64> %vector, i64 1 128 ret i64 %d 129} 130 131define i64 @extract_v2i64_extract_build_vector_const(<2 x i64> %a, i32 %c) { 132; CHECK-LABEL: extract_v2i64_extract_build_vector_const: 133; CHECK: // %bb.0: // %entry 134; CHECK-NEXT: mov w0, #11 // =0xb 135; CHECK-NEXT: ret 136entry: 137 %d = extractelement <2 x i64> <i64 42, i64 11>, i32 1 138 ret i64 %d 139} 140 141define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) { 142; CHECK-SD-LABEL: extract_v2i64_extract_build_vector_opaque: 143; CHECK-SD: // %bb.0: // %entry 144; CHECK-SD-NEXT: sub sp, sp, #16 145; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 146; CHECK-SD-NEXT: adrp x8, .LCPI8_0 147; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 148; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] 149; CHECK-SD-NEXT: mov x8, sp 150; CHECK-SD-NEXT: bfi x8, x0, #3, #1 151; CHECK-SD-NEXT: str q0, [sp] 152; CHECK-SD-NEXT: ldr x0, [x8] 153; CHECK-SD-NEXT: add sp, sp, #16 154; CHECK-SD-NEXT: ret 155; 156; CHECK-GI-LABEL: extract_v2i64_extract_build_vector_opaque: 157; CHECK-GI: // %bb.0: // %entry 158; CHECK-GI-NEXT: sub sp, sp, #16 159; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 160; CHECK-GI-NEXT: adrp x8, .LCPI8_0 161; CHECK-GI-NEXT: mov x9, sp 162; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] 163; CHECK-GI-NEXT: mov w8, w0 164; CHECK-GI-NEXT: and x8, x8, #0x1 165; CHECK-GI-NEXT: str q0, [sp] 166; CHECK-GI-NEXT: ldr x0, [x9, x8, lsl #3] 167; CHECK-GI-NEXT: add sp, sp, #16 168; CHECK-GI-NEXT: ret 169entry: 170 %d = extractelement <2 x i64> <i64 42, i64 11>, i32 %c 171 ret i64 %d 172} 173 174 175define i64 @extract_v2i32_zext(<2 x i32> %a, i32 %c) { 176; CHECK-SD-LABEL: extract_v2i32_zext: 177; CHECK-SD: // %bb.0: // %entry 178; CHECK-SD-NEXT: sub sp, sp, #16 179; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 180; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 181; CHECK-SD-NEXT: mov x8, sp 182; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 183; CHECK-SD-NEXT: bfi x8, x0, #3, #1 184; CHECK-SD-NEXT: str q0, [sp] 185; CHECK-SD-NEXT: ldr x0, [x8] 186; CHECK-SD-NEXT: add sp, sp, #16 187; CHECK-SD-NEXT: ret 188; 189; CHECK-GI-LABEL: extract_v2i32_zext: 190; CHECK-GI: // %bb.0: // %entry 191; CHECK-GI-NEXT: sub sp, sp, #16 192; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 193; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 194; CHECK-GI-NEXT: mov w9, w0 195; CHECK-GI-NEXT: mov x8, sp 196; CHECK-GI-NEXT: and x9, x9, #0x1 197; CHECK-GI-NEXT: str q0, [sp] 198; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] 199; CHECK-GI-NEXT: add sp, sp, #16 200; CHECK-GI-NEXT: ret 201entry: 202 %zvector = zext <2 x i32> %a to <2 x i64> 203 %d = extractelement <2 x i64> %zvector, i32 %c 204 ret i64 %d 205} 206 207define i64 @extract_v2double_fptosi(<2 x double> %a, i32 %c) { 208; CHECK-SD-LABEL: extract_v2double_fptosi: 209; CHECK-SD: // %bb.0: // %entry 210; CHECK-SD-NEXT: sub sp, sp, #16 211; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 212; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d 213; CHECK-SD-NEXT: mov x8, sp 214; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 215; CHECK-SD-NEXT: bfi x8, x0, #3, #1 216; CHECK-SD-NEXT: str q0, [sp] 217; CHECK-SD-NEXT: ldr x0, [x8] 218; CHECK-SD-NEXT: add sp, sp, #16 219; CHECK-SD-NEXT: ret 220; 221; CHECK-GI-LABEL: extract_v2double_fptosi: 222; CHECK-GI: // %bb.0: // %entry 223; CHECK-GI-NEXT: sub sp, sp, #16 224; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 225; CHECK-GI-NEXT: fcvtzs v0.2d, v0.2d 226; CHECK-GI-NEXT: mov w9, w0 227; CHECK-GI-NEXT: mov x8, sp 228; CHECK-GI-NEXT: and x9, x9, #0x1 229; CHECK-GI-NEXT: str q0, [sp] 230; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] 231; CHECK-GI-NEXT: add sp, sp, #16 232; CHECK-GI-NEXT: ret 233entry: 234 %vector = fptosi <2 x double> %a to <2 x i64> 235 %d = extractelement <2 x i64> %vector, i32 %c 236 ret i64 %d 237} 238 239define double @extract_v2double_fneg(<2 x double> %a, i32 %c) { 240; CHECK-SD-LABEL: extract_v2double_fneg: 241; CHECK-SD: // %bb.0: // %entry 242; CHECK-SD-NEXT: sub sp, sp, #16 243; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 244; CHECK-SD-NEXT: fneg v0.2d, v0.2d 245; CHECK-SD-NEXT: mov x8, sp 246; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 247; CHECK-SD-NEXT: bfi x8, x0, #3, #1 248; CHECK-SD-NEXT: str q0, [sp] 249; CHECK-SD-NEXT: ldr d0, [x8] 250; CHECK-SD-NEXT: add sp, sp, #16 251; CHECK-SD-NEXT: ret 252; 253; CHECK-GI-LABEL: extract_v2double_fneg: 254; CHECK-GI: // %bb.0: // %entry 255; CHECK-GI-NEXT: sub sp, sp, #16 256; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 257; CHECK-GI-NEXT: fneg v0.2d, v0.2d 258; CHECK-GI-NEXT: mov w9, w0 259; CHECK-GI-NEXT: mov x8, sp 260; CHECK-GI-NEXT: and x9, x9, #0x1 261; CHECK-GI-NEXT: str q0, [sp] 262; CHECK-GI-NEXT: ldr d0, [x8, x9, lsl #3] 263; CHECK-GI-NEXT: add sp, sp, #16 264; CHECK-GI-NEXT: ret 265entry: 266 %vector = fneg <2 x double> %a 267 %d = extractelement <2 x double> %vector, i32 %c 268 ret double %d 269} 270 271define i32 @extract_v4i32_add(<4 x i32> %a, <4 x i32> %b, i32 %c) { 272; CHECK-SD-LABEL: extract_v4i32_add: 273; CHECK-SD: // %bb.0: // %entry 274; CHECK-SD-NEXT: sub sp, sp, #16 275; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 276; CHECK-SD-NEXT: adrp x8, .LCPI12_0 277; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 278; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] 279; CHECK-SD-NEXT: mov x8, sp 280; CHECK-SD-NEXT: bfi x8, x0, #2, #2 281; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s 282; CHECK-SD-NEXT: str q0, [sp] 283; CHECK-SD-NEXT: ldr w0, [x8] 284; CHECK-SD-NEXT: add sp, sp, #16 285; CHECK-SD-NEXT: ret 286; 287; CHECK-GI-LABEL: extract_v4i32_add: 288; CHECK-GI: // %bb.0: // %entry 289; CHECK-GI-NEXT: sub sp, sp, #16 290; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 291; CHECK-GI-NEXT: adrp x8, .LCPI12_0 292; CHECK-GI-NEXT: mov x9, sp 293; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] 294; CHECK-GI-NEXT: mov w8, w0 295; CHECK-GI-NEXT: and x8, x8, #0x3 296; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s 297; CHECK-GI-NEXT: str q0, [sp] 298; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 299; CHECK-GI-NEXT: add sp, sp, #16 300; CHECK-GI-NEXT: ret 301entry: 302 %vector = add <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6> 303 %d = extractelement <4 x i32> %vector, i32 %c 304 ret i32 %d 305} 306 307define float @extract_v4i32_minimum(<4 x float> %a, <4 x float> %b, i32 %c) { 308; CHECK-SD-LABEL: extract_v4i32_minimum: 309; CHECK-SD: // %bb.0: // %entry 310; CHECK-SD-NEXT: sub sp, sp, #16 311; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 312; CHECK-SD-NEXT: fmin v0.4s, v0.4s, v1.4s 313; CHECK-SD-NEXT: mov x8, sp 314; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 315; CHECK-SD-NEXT: bfi x8, x0, #2, #2 316; CHECK-SD-NEXT: str q0, [sp] 317; CHECK-SD-NEXT: ldr s0, [x8] 318; CHECK-SD-NEXT: add sp, sp, #16 319; CHECK-SD-NEXT: ret 320; 321; CHECK-GI-LABEL: extract_v4i32_minimum: 322; CHECK-GI: // %bb.0: // %entry 323; CHECK-GI-NEXT: sub sp, sp, #16 324; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 325; CHECK-GI-NEXT: fmin v0.4s, v0.4s, v1.4s 326; CHECK-GI-NEXT: mov w8, w0 327; CHECK-GI-NEXT: mov x9, sp 328; CHECK-GI-NEXT: and x8, x8, #0x3 329; CHECK-GI-NEXT: str q0, [sp] 330; CHECK-GI-NEXT: ldr s0, [x9, x8, lsl #2] 331; CHECK-GI-NEXT: add sp, sp, #16 332; CHECK-GI-NEXT: ret 333entry: 334 %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> %b) 335 %d = extractelement <4 x float> %vector, i32 %c 336 ret float %d 337} 338 339define float @extract_v4i32_minimum_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) { 340; CHECK-SD-LABEL: extract_v4i32_minimum_build_vector: 341; CHECK-SD: // %bb.0: // %entry 342; CHECK-SD-NEXT: sub sp, sp, #16 343; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 344; CHECK-SD-NEXT: adrp x8, .LCPI14_0 345; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 346; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] 347; CHECK-SD-NEXT: mov x8, sp 348; CHECK-SD-NEXT: bfi x8, x0, #2, #2 349; CHECK-SD-NEXT: fmin v0.4s, v0.4s, v1.4s 350; CHECK-SD-NEXT: str q0, [sp] 351; CHECK-SD-NEXT: ldr s0, [x8] 352; CHECK-SD-NEXT: add sp, sp, #16 353; CHECK-SD-NEXT: ret 354; 355; CHECK-GI-LABEL: extract_v4i32_minimum_build_vector: 356; CHECK-GI: // %bb.0: // %entry 357; CHECK-GI-NEXT: sub sp, sp, #16 358; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 359; CHECK-GI-NEXT: adrp x8, .LCPI14_0 360; CHECK-GI-NEXT: mov x9, sp 361; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] 362; CHECK-GI-NEXT: mov w8, w0 363; CHECK-GI-NEXT: and x8, x8, #0x3 364; CHECK-GI-NEXT: fmin v0.4s, v0.4s, v1.4s 365; CHECK-GI-NEXT: str q0, [sp] 366; CHECK-GI-NEXT: ldr s0, [x9, x8, lsl #2] 367; CHECK-GI-NEXT: add sp, sp, #16 368; CHECK-GI-NEXT: ret 369entry: 370 %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) 371 %d = extractelement <4 x float> %vector, i32 %c 372 ret float %d 373} 374 375define float @extract_v4i32_minimum_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) { 376; CHECK-LABEL: extract_v4i32_minimum_build_vector_const: 377; CHECK: // %bb.0: // %entry 378; CHECK-NEXT: adrp x8, .LCPI15_0 379; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] 380; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s 381; CHECK-NEXT: mov s0, v0.s[1] 382; CHECK-NEXT: ret 383entry: 384 %vector = call <4 x float> @llvm.minimum.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) 385 %d = extractelement <4 x float> %vector, i32 1 386 ret float %d 387} 388 389define float @extract_v4i32_copysign_build_vector(<4 x float> %a, <4 x float> %b, i32 %c) { 390; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector: 391; CHECK-SD: // %bb.0: // %entry 392; CHECK-SD-NEXT: sub sp, sp, #16 393; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 394; CHECK-SD-NEXT: adrp x8, .LCPI16_0 395; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24 396; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 397; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] 398; CHECK-SD-NEXT: mov x8, sp 399; CHECK-SD-NEXT: bfi x8, x0, #2, #2 400; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b 401; CHECK-SD-NEXT: str q0, [sp] 402; CHECK-SD-NEXT: ldr s0, [x8] 403; CHECK-SD-NEXT: add sp, sp, #16 404; CHECK-SD-NEXT: ret 405; 406; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector: 407; CHECK-GI: // %bb.0: // %entry 408; CHECK-GI-NEXT: sub sp, sp, #16 409; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 410; CHECK-GI-NEXT: mvni v1.4s, #128, lsl #24 411; CHECK-GI-NEXT: mov w8, w0 412; CHECK-GI-NEXT: mov x9, sp 413; CHECK-GI-NEXT: and x8, x8, #0x3 414; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b 415; CHECK-GI-NEXT: str q0, [sp] 416; CHECK-GI-NEXT: ldr s0, [x9, x8, lsl #2] 417; CHECK-GI-NEXT: add sp, sp, #16 418; CHECK-GI-NEXT: ret 419entry: 420 %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) 421 %d = extractelement <4 x float> %vector, i32 %c 422 ret float %d 423} 424 425define float @extract_v4i32_copysign_build_vector_const(<4 x float> %a, <4 x float> %b, i32 %c) { 426; CHECK-SD-LABEL: extract_v4i32_copysign_build_vector_const: 427; CHECK-SD: // %bb.0: // %entry 428; CHECK-SD-NEXT: adrp x8, .LCPI17_0 429; CHECK-SD-NEXT: mvni v1.4s, #128, lsl #24 430; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] 431; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b 432; CHECK-SD-NEXT: mov s0, v0.s[2] 433; CHECK-SD-NEXT: ret 434; 435; CHECK-GI-LABEL: extract_v4i32_copysign_build_vector_const: 436; CHECK-GI: // %bb.0: // %entry 437; CHECK-GI-NEXT: mvni v1.4s, #128, lsl #24 438; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b 439; CHECK-GI-NEXT: mov s0, v0.s[2] 440; CHECK-GI-NEXT: ret 441entry: 442 %vector = call <4 x float> @llvm.copysign.v4float(<4 x float> %a, <4 x float> <float 42.0, float 11.0, float 17.0, float 6.0>) 443 %d = extractelement <4 x float> %vector, i32 2 444 ret float %d 445} 446 447 448define i32 @extract_v4i32_icmp(<4 x i32> %a, <4 x i32> %b, i32 %c) { 449; CHECK-SD-LABEL: extract_v4i32_icmp: 450; CHECK-SD: // %bb.0: // %entry 451; CHECK-SD-NEXT: sub sp, sp, #16 452; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 453; CHECK-SD-NEXT: adrp x8, .LCPI18_0 454; CHECK-SD-NEXT: movi v2.4s, #1 455; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 456; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] 457; CHECK-SD-NEXT: mov x8, sp 458; CHECK-SD-NEXT: bfi x8, x0, #2, #2 459; CHECK-SD-NEXT: cmge v0.4s, v1.4s, v0.4s 460; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b 461; CHECK-SD-NEXT: str q0, [sp] 462; CHECK-SD-NEXT: ldr w0, [x8] 463; CHECK-SD-NEXT: add sp, sp, #16 464; CHECK-SD-NEXT: ret 465; 466; CHECK-GI-LABEL: extract_v4i32_icmp: 467; CHECK-GI: // %bb.0: // %entry 468; CHECK-GI-NEXT: sub sp, sp, #16 469; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 470; CHECK-GI-NEXT: adrp x8, .LCPI18_0 471; CHECK-GI-NEXT: movi v2.4s, #1 472; CHECK-GI-NEXT: mov x9, sp 473; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] 474; CHECK-GI-NEXT: mov w8, w0 475; CHECK-GI-NEXT: and x8, x8, #0x3 476; CHECK-GI-NEXT: cmge v0.4s, v1.4s, v0.4s 477; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b 478; CHECK-GI-NEXT: str q0, [sp] 479; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 480; CHECK-GI-NEXT: add sp, sp, #16 481; CHECK-GI-NEXT: ret 482entry: 483 %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6> 484 %zvector = zext <4 x i1> %vector to <4 x i32> 485 %d = extractelement <4 x i32> %zvector, i32 %c 486 ret i32 %d 487} 488 489define i32 @extract_v4i32_icmp_const(<4 x i32> %a, <4 x i32> %b, i32 %c) { 490; CHECK-SD-LABEL: extract_v4i32_icmp_const: 491; CHECK-SD: // %bb.0: // %entry 492; CHECK-SD-NEXT: adrp x8, .LCPI19_0 493; CHECK-SD-NEXT: movi v2.4s, #1 494; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] 495; CHECK-SD-NEXT: cmge v0.4s, v1.4s, v0.4s 496; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b 497; CHECK-SD-NEXT: mov w0, v0.s[2] 498; CHECK-SD-NEXT: ret 499; 500; CHECK-GI-LABEL: extract_v4i32_icmp_const: 501; CHECK-GI: // %bb.0: // %entry 502; CHECK-GI-NEXT: adrp x8, .LCPI19_0 503; CHECK-GI-NEXT: movi v2.4s, #1 504; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] 505; CHECK-GI-NEXT: cmge v0.4s, v1.4s, v0.4s 506; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b 507; CHECK-GI-NEXT: mov s0, v0.s[2] 508; CHECK-GI-NEXT: fmov w0, s0 509; CHECK-GI-NEXT: ret 510entry: 511 %vector = icmp sle <4 x i32> %a, <i32 42, i32 11, i32 17, i32 6> 512 %zvector = zext <4 x i1> %vector to <4 x i32> 513 %d = extractelement <4 x i32> %zvector, i32 2 514 ret i32 %d 515} 516 517define i32 @extract_v4float_fcmp(<4 x float> %a, <4 x float> %b, i32 %c) { 518; CHECK-SD-LABEL: extract_v4float_fcmp: 519; CHECK-SD: // %bb.0: // %entry 520; CHECK-SD-NEXT: sub sp, sp, #16 521; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 522; CHECK-SD-NEXT: movi v1.4s, #1 523; CHECK-SD-NEXT: fcmeq v0.4s, v0.4s, v0.4s 524; CHECK-SD-NEXT: mov x8, sp 525; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 526; CHECK-SD-NEXT: bfi x8, x0, #2, #2 527; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b 528; CHECK-SD-NEXT: str q0, [sp] 529; CHECK-SD-NEXT: ldr w0, [x8] 530; CHECK-SD-NEXT: add sp, sp, #16 531; CHECK-SD-NEXT: ret 532; 533; CHECK-GI-LABEL: extract_v4float_fcmp: 534; CHECK-GI: // %bb.0: // %entry 535; CHECK-GI-NEXT: sub sp, sp, #16 536; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 537; CHECK-GI-NEXT: fmov v1.4s, #1.00000000 538; CHECK-GI-NEXT: mov w8, w0 539; CHECK-GI-NEXT: mov x9, sp 540; CHECK-GI-NEXT: and x8, x8, #0x3 541; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s 542; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s 543; CHECK-GI-NEXT: movi v1.4s, #1 544; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b 545; CHECK-GI-NEXT: bic v0.16b, v1.16b, v0.16b 546; CHECK-GI-NEXT: str q0, [sp] 547; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 548; CHECK-GI-NEXT: add sp, sp, #16 549; CHECK-GI-NEXT: ret 550entry: 551 %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0> 552 %zvector = zext <4 x i1> %vector to <4 x i32> 553 %d = extractelement <4 x i32> %zvector, i32 %c 554 ret i32 %d 555} 556 557define i32 @extract_v4float_fcmp_const(<4 x float> %a, <4 x float> %b, i32 %c) { 558; CHECK-SD-LABEL: extract_v4float_fcmp_const: 559; CHECK-SD: // %bb.0: // %entry 560; CHECK-SD-NEXT: movi v1.4s, #1 561; CHECK-SD-NEXT: fcmeq v0.4s, v0.4s, v0.4s 562; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b 563; CHECK-SD-NEXT: mov w0, v0.s[1] 564; CHECK-SD-NEXT: ret 565; 566; CHECK-GI-LABEL: extract_v4float_fcmp_const: 567; CHECK-GI: // %bb.0: // %entry 568; CHECK-GI-NEXT: fmov v1.4s, #1.00000000 569; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s 570; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s 571; CHECK-GI-NEXT: movi v1.4s, #1 572; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b 573; CHECK-GI-NEXT: bic v0.16b, v1.16b, v0.16b 574; CHECK-GI-NEXT: mov s0, v0.s[1] 575; CHECK-GI-NEXT: fmov w0, s0 576; CHECK-GI-NEXT: ret 577entry: 578 %vector = fcmp uno <4 x float> %a, <float 1.0, float 1.0, float 1.0, float 1.0> 579 %zvector = zext <4 x i1> %vector to <4 x i32> 580 %d = extractelement <4 x i32> %zvector, i32 1 581 ret i32 %d 582} 583 584define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) { 585; CHECK-SD-LABEL: extract_v4i32_select: 586; CHECK-SD: // %bb.0: // %entry 587; CHECK-SD-NEXT: sub sp, sp, #16 588; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 589; CHECK-SD-NEXT: ushll v1.4s, v2.4h, #0 590; CHECK-SD-NEXT: adrp x8, .LCPI22_0 591; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 592; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] 593; CHECK-SD-NEXT: mov x8, sp 594; CHECK-SD-NEXT: bfi x8, x0, #2, #2 595; CHECK-SD-NEXT: shl v1.4s, v1.4s, #31 596; CHECK-SD-NEXT: cmlt v1.4s, v1.4s, #0 597; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b 598; CHECK-SD-NEXT: str q0, [sp] 599; CHECK-SD-NEXT: ldr w0, [x8] 600; CHECK-SD-NEXT: add sp, sp, #16 601; CHECK-SD-NEXT: ret 602; 603; CHECK-GI-LABEL: extract_v4i32_select: 604; CHECK-GI: // %bb.0: // %entry 605; CHECK-GI-NEXT: sub sp, sp, #16 606; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 607; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 608; CHECK-GI-NEXT: adrp x8, .LCPI22_0 609; CHECK-GI-NEXT: mov x9, sp 610; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] 611; CHECK-GI-NEXT: mov w8, w0 612; CHECK-GI-NEXT: and x8, x8, #0x3 613; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 614; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 615; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b 616; CHECK-GI-NEXT: str q0, [sp] 617; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 618; CHECK-GI-NEXT: add sp, sp, #16 619; CHECK-GI-NEXT: ret 620entry: 621 %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6> 622 %d = extractelement <4 x i32> %vector, i32 %c 623 ret i32 %d 624} 625 626define i32 @extract_v4i32_select_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %cond) { 627; CHECK-SD-LABEL: extract_v4i32_select_const: 628; CHECK-SD: // %bb.0: // %entry 629; CHECK-SD-NEXT: ushll v1.4s, v2.4h, #0 630; CHECK-SD-NEXT: movi v2.4s, #17 631; CHECK-SD-NEXT: shl v1.4s, v1.4s, #31 632; CHECK-SD-NEXT: cmlt v1.4s, v1.4s, #0 633; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b 634; CHECK-SD-NEXT: mov w0, v0.s[2] 635; CHECK-SD-NEXT: ret 636; 637; CHECK-GI-LABEL: extract_v4i32_select_const: 638; CHECK-GI: // %bb.0: // %entry 639; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 640; CHECK-GI-NEXT: adrp x8, .LCPI23_0 641; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] 642; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31 643; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31 644; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b 645; CHECK-GI-NEXT: mov s0, v0.s[2] 646; CHECK-GI-NEXT: fmov w0, s0 647; CHECK-GI-NEXT: ret 648entry: 649 %vector = select <4 x i1> %cond, <4 x i32> %a, <4 x i32> <i32 42, i32 11, i32 17, i32 6> 650 %d = extractelement <4 x i32> %vector, i32 2 651 ret i32 %d 652} 653 654define i32 @extract_v4i32_abs(<4 x float> %a, i32 %c) { 655; CHECK-SD-LABEL: extract_v4i32_abs: 656; CHECK-SD: // %bb.0: // %entry 657; CHECK-SD-NEXT: sub sp, sp, #16 658; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 659; CHECK-SD-NEXT: frintp v0.4s, v0.4s 660; CHECK-SD-NEXT: mov x8, sp 661; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 662; CHECK-SD-NEXT: bfi x8, x0, #2, #2 663; CHECK-SD-NEXT: frintm v0.4s, v0.4s 664; CHECK-SD-NEXT: fabs v0.4s, v0.4s 665; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s 666; CHECK-SD-NEXT: abs v0.4s, v0.4s 667; CHECK-SD-NEXT: str q0, [sp] 668; CHECK-SD-NEXT: ldr w0, [x8] 669; CHECK-SD-NEXT: add sp, sp, #16 670; CHECK-SD-NEXT: ret 671; 672; CHECK-GI-LABEL: extract_v4i32_abs: 673; CHECK-GI: // %bb.0: // %entry 674; CHECK-GI-NEXT: sub sp, sp, #16 675; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 676; CHECK-GI-NEXT: frintp v0.4s, v0.4s 677; CHECK-GI-NEXT: mov w9, w0 678; CHECK-GI-NEXT: mov x8, sp 679; CHECK-GI-NEXT: and x9, x9, #0x3 680; CHECK-GI-NEXT: frintm v0.4s, v0.4s 681; CHECK-GI-NEXT: fabs v0.4s, v0.4s 682; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s 683; CHECK-GI-NEXT: abs v0.4s, v0.4s 684; CHECK-GI-NEXT: str q0, [sp] 685; CHECK-GI-NEXT: ldr w0, [x8, x9, lsl #2] 686; CHECK-GI-NEXT: add sp, sp, #16 687; CHECK-GI-NEXT: ret 688entry: 689 %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> %a) 690 %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil) 691 %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor) 692 %abs = fptosi <4 x float> %fabs to <4 x i32> 693 %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0) 694 %d = extractelement <4 x i32> %vector, i32 %c 695 ret i32 %d 696} 697 698define i32 @extract_v4i32_abs_const(<4 x float> %a, i32 %c) { 699; CHECK-SD-LABEL: extract_v4i32_abs_const: 700; CHECK-SD: // %bb.0: // %entry 701; CHECK-SD-NEXT: mov w0, #4 // =0x4 702; CHECK-SD-NEXT: ret 703; 704; CHECK-GI-LABEL: extract_v4i32_abs_const: 705; CHECK-GI: // %bb.0: // %entry 706; CHECK-GI-NEXT: adrp x8, .LCPI25_0 707; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] 708; CHECK-GI-NEXT: frintp v0.4s, v0.4s 709; CHECK-GI-NEXT: frintm v0.4s, v0.4s 710; CHECK-GI-NEXT: fabs v0.4s, v0.4s 711; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s 712; CHECK-GI-NEXT: abs v0.4s, v0.4s 713; CHECK-GI-NEXT: mov s0, v0.s[1] 714; CHECK-GI-NEXT: fmov w0, s0 715; CHECK-GI-NEXT: ret 716entry: 717 %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>) 718 %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil) 719 %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor) 720 %abs = fptosi <4 x float> %fabs to <4 x i32> 721 %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0) 722 %d = extractelement <4 x i32> %vector, i32 1 723 ret i32 %d 724} 725 726define i32 @extract_v4i32_abs_half_const(<4 x float> %a, i32 %c) { 727; CHECK-SD-LABEL: extract_v4i32_abs_half_const: 728; CHECK-SD: // %bb.0: // %entry 729; CHECK-SD-NEXT: sub sp, sp, #16 730; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 731; CHECK-SD-NEXT: adrp x8, .LCPI26_0 732; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 733; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] 734; CHECK-SD-NEXT: mov x8, sp 735; CHECK-SD-NEXT: bfi x8, x0, #2, #2 736; CHECK-SD-NEXT: str q0, [sp] 737; CHECK-SD-NEXT: ldr w0, [x8] 738; CHECK-SD-NEXT: add sp, sp, #16 739; CHECK-SD-NEXT: ret 740; 741; CHECK-GI-LABEL: extract_v4i32_abs_half_const: 742; CHECK-GI: // %bb.0: // %entry 743; CHECK-GI-NEXT: sub sp, sp, #16 744; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 745; CHECK-GI-NEXT: adrp x8, .LCPI26_0 746; CHECK-GI-NEXT: mov x9, sp 747; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] 748; CHECK-GI-NEXT: mov w8, w0 749; CHECK-GI-NEXT: and x8, x8, #0x3 750; CHECK-GI-NEXT: frintp v0.4s, v0.4s 751; CHECK-GI-NEXT: frintm v0.4s, v0.4s 752; CHECK-GI-NEXT: fabs v0.4s, v0.4s 753; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s 754; CHECK-GI-NEXT: abs v0.4s, v0.4s 755; CHECK-GI-NEXT: str q0, [sp] 756; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 757; CHECK-GI-NEXT: add sp, sp, #16 758; CHECK-GI-NEXT: ret 759entry: 760 %ceil = call <4 x float> @llvm.ceil.v4float(<4 x float> <float 1.0, float 4.0, float 3.0, float 2.0>) 761 %floor = call <4 x float> @llvm.floor.v4float(<4 x float> %ceil) 762 %fabs = call <4 x float> @llvm.fabs.v4float(<4 x float> %floor) 763 %abs = fptosi <4 x float> %fabs to <4 x i32> 764 %vector = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %abs, i1 0) 765 %d = extractelement <4 x i32> %vector, i32 %c 766 ret i32 %d 767} 768 769define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) { 770; CHECK-LABEL: extract_v4i32_vector_insert: 771; CHECK: // %bb.0: // %entry 772; CHECK-NEXT: sub sp, sp, #16 773; CHECK-NEXT: .cfi_def_cfa_offset 16 774; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 775; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 776; CHECK-NEXT: mov x8, sp 777; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 778; CHECK-NEXT: bfi x8, x0, #2, #2 779; CHECK-NEXT: mov v1.d[1], v0.d[0] 780; CHECK-NEXT: str q1, [sp] 781; CHECK-NEXT: ldr w0, [x8] 782; CHECK-NEXT: add sp, sp, #16 783; CHECK-NEXT: ret 784entry: 785 %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) 786 %d = extractelement <4 x i32> %vector, i32 %c 787 ret i32 %d 788} 789 790define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { 791; CHECK-LABEL: extract_v4i32_vector_insert_const: 792; CHECK: // %bb.0: // %entry 793; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 794; CHECK-NEXT: mov w0, v1.s[1] 795; CHECK-NEXT: ret 796entry: 797 %vector = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> %a, <2 x i32> %b, i64 0) 798 %d = extractelement <4 x i32> %vector, i32 1 799 ret i32 %d 800} 801 802define i32 @extract_v4i32_vector_extract(<4 x i32> %a, <2 x i32> %b, i32 %c) { 803; CHECK-LABEL: extract_v4i32_vector_extract: 804; CHECK: // %bb.0: // %entry 805; CHECK-NEXT: sub sp, sp, #16 806; CHECK-NEXT: .cfi_def_cfa_offset 16 807; CHECK-NEXT: mov x8, sp 808; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 809; CHECK-NEXT: str q0, [sp] 810; CHECK-NEXT: bfi x8, x0, #2, #2 811; CHECK-NEXT: ldr w0, [x8] 812; CHECK-NEXT: add sp, sp, #16 813; CHECK-NEXT: ret 814entry: 815 %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0) 816 %d = extractelement <4 x i32> %vector, i32 %c 817 ret i32 %d 818} 819 820define i32 @extract_v4i32_vector_extract_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { 821; CHECK-LABEL: extract_v4i32_vector_extract_const: 822; CHECK: // %bb.0: // %entry 823; CHECK-NEXT: fmov w0, s0 824; CHECK-NEXT: ret 825entry: 826 %vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0) 827 %d = extractelement <4 x i32> %vector, i32 0 828 ret i32 %d 829} 830 831define i32 @extract_v4i32_load(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) { 832; CHECK-SD-LABEL: extract_v4i32_load: 833; CHECK-SD: // %bb.0: // %entry 834; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 835; CHECK-SD-NEXT: and x8, x0, #0x3 836; CHECK-SD-NEXT: ldr w0, [x1, x8, lsl #2] 837; CHECK-SD-NEXT: ret 838; 839; CHECK-GI-LABEL: extract_v4i32_load: 840; CHECK-GI: // %bb.0: // %entry 841; CHECK-GI-NEXT: mov w8, w0 842; CHECK-GI-NEXT: and x8, x8, #0x3 843; CHECK-GI-NEXT: ldr w0, [x1, x8, lsl #2] 844; CHECK-GI-NEXT: ret 845entry: 846 %vector = load <4 x i32>, ptr %arg 847 %d = extractelement <4 x i32> %vector, i32 %c 848 ret i32 %d 849} 850 851define i32 @extract_v4i32_load_const(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) { 852; CHECK-LABEL: extract_v4i32_load_const: 853; CHECK: // %bb.0: // %entry 854; CHECK-NEXT: ldr w0, [x1] 855; CHECK-NEXT: ret 856entry: 857 %vector = load <4 x i32>, ptr %arg 858 %d = extractelement <4 x i32> %vector, i32 0 859 ret i32 %d 860} 861 862define double @extract_v4i32_bitcast(<4 x i32> %a, i32 %c) { 863; CHECK-SD-LABEL: extract_v4i32_bitcast: 864; CHECK-SD: // %bb.0: // %entry 865; CHECK-SD-NEXT: sub sp, sp, #16 866; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 867; CHECK-SD-NEXT: mov x8, sp 868; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 869; CHECK-SD-NEXT: str q0, [sp] 870; CHECK-SD-NEXT: bfi x8, x0, #3, #1 871; CHECK-SD-NEXT: ldr d0, [x8] 872; CHECK-SD-NEXT: add sp, sp, #16 873; CHECK-SD-NEXT: ret 874; 875; CHECK-GI-LABEL: extract_v4i32_bitcast: 876; CHECK-GI: // %bb.0: // %entry 877; CHECK-GI-NEXT: sub sp, sp, #16 878; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 879; CHECK-GI-NEXT: mov w9, w0 880; CHECK-GI-NEXT: mov x8, sp 881; CHECK-GI-NEXT: str q0, [sp] 882; CHECK-GI-NEXT: and x9, x9, #0x1 883; CHECK-GI-NEXT: ldr d0, [x8, x9, lsl #3] 884; CHECK-GI-NEXT: add sp, sp, #16 885; CHECK-GI-NEXT: ret 886entry: 887 %vector = bitcast <4 x i32> %a to <2 x double> 888 %d = extractelement <2 x double> %vector, i32 %c 889 ret double %d 890} 891 892define double @extract_v4i32_bitcast_const(<4 x i32> %a, i32 %c) { 893; CHECK-LABEL: extract_v4i32_bitcast_const: 894; CHECK: // %bb.0: // %entry 895; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 896; CHECK-NEXT: ret 897entry: 898 %vector = bitcast <4 x i32> %a to <2 x double> 899 %d = extractelement <2 x double> %vector, i32 0 900 ret double %d 901} 902 903define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) { 904; CHECK-SD-LABEL: extract_v4i32_shuffle: 905; CHECK-SD: // %bb.0: // %entry 906; CHECK-SD-NEXT: sub sp, sp, #16 907; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 908; CHECK-SD-NEXT: uzp1 v1.4s, v0.4s, v1.4s 909; CHECK-SD-NEXT: mov x8, sp 910; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 911; CHECK-SD-NEXT: bfi x8, x0, #2, #2 912; CHECK-SD-NEXT: mov v1.s[3], v0.s[3] 913; CHECK-SD-NEXT: str q1, [sp] 914; CHECK-SD-NEXT: ldr w0, [x8] 915; CHECK-SD-NEXT: add sp, sp, #16 916; CHECK-SD-NEXT: ret 917; 918; CHECK-GI-LABEL: extract_v4i32_shuffle: 919; CHECK-GI: // %bb.0: // %entry 920; CHECK-GI-NEXT: sub sp, sp, #16 921; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 922; CHECK-GI-NEXT: adrp x8, .LCPI35_0 923; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 924; CHECK-GI-NEXT: mov x9, sp 925; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] 926; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 927; CHECK-GI-NEXT: mov w8, w0 928; CHECK-GI-NEXT: and x8, x8, #0x3 929; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 930; CHECK-GI-NEXT: str q0, [sp] 931; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 932; CHECK-GI-NEXT: add sp, sp, #16 933; CHECK-GI-NEXT: ret 934entry: 935 %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3> 936 %d = extractelement <4 x i32> %vector, i32 %c 937 ret i32 %d 938} 939 940define i32 @extract_v4i32_shuffle_const(<4 x i32> %a, <4 x i32> %b, i32 %c) { 941; CHECK-LABEL: extract_v4i32_shuffle_const: 942; CHECK: // %bb.0: // %entry 943; CHECK-NEXT: fmov w0, s1 944; CHECK-NEXT: ret 945entry: 946 %vector = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 3> 947 %d = extractelement <4 x i32> %vector, i32 2 948 ret i32 %d 949} 950 951define i32 @extract_v4i32_splat(<4 x i32> %a, <2 x i32> %b, i32 %c) { 952; CHECK-SD-LABEL: extract_v4i32_splat: 953; CHECK-SD: // %bb.0: // %entry 954; CHECK-SD-NEXT: sub sp, sp, #16 955; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 956; CHECK-SD-NEXT: movi v0.4s, #11 957; CHECK-SD-NEXT: mov x8, sp 958; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 959; CHECK-SD-NEXT: bfi x8, x0, #2, #2 960; CHECK-SD-NEXT: str q0, [sp] 961; CHECK-SD-NEXT: ldr w0, [x8] 962; CHECK-SD-NEXT: add sp, sp, #16 963; CHECK-SD-NEXT: ret 964; 965; CHECK-GI-LABEL: extract_v4i32_splat: 966; CHECK-GI: // %bb.0: // %entry 967; CHECK-GI-NEXT: sub sp, sp, #16 968; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 969; CHECK-GI-NEXT: movi v0.4s, #11 970; CHECK-GI-NEXT: mov w8, w0 971; CHECK-GI-NEXT: mov x9, sp 972; CHECK-GI-NEXT: and x8, x8, #0x3 973; CHECK-GI-NEXT: str q0, [sp] 974; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 975; CHECK-GI-NEXT: add sp, sp, #16 976; CHECK-GI-NEXT: ret 977entry: 978 %d = extractelement <4 x i32> splat (i32 11), i32 %c 979 ret i32 %d 980} 981 982define i32 @extract_v4i32_splat_const(<4 x i32> %a, <2 x i32> %b, i32 %c) { 983; CHECK-LABEL: extract_v4i32_splat_const: 984; CHECK: // %bb.0: // %entry 985; CHECK-NEXT: mov w0, #11 // =0xb 986; CHECK-NEXT: ret 987entry: 988 %d = extractelement <4 x i32> splat (i32 11), i32 0 989 ret i32 %d 990} 991 992define i32 @extract_v4i32_vp_add(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) { 993; CHECK-SD-LABEL: extract_v4i32_vp_add: 994; CHECK-SD: // %bb.0: // %entry 995; CHECK-SD-NEXT: sub sp, sp, #16 996; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 997; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s 998; CHECK-SD-NEXT: mov x8, sp 999; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 1000; CHECK-SD-NEXT: bfi x8, x0, #2, #2 1001; CHECK-SD-NEXT: str q0, [sp] 1002; CHECK-SD-NEXT: ldr w0, [x8] 1003; CHECK-SD-NEXT: add sp, sp, #16 1004; CHECK-SD-NEXT: ret 1005; 1006; CHECK-GI-LABEL: extract_v4i32_vp_add: 1007; CHECK-GI: // %bb.0: // %entry 1008; CHECK-GI-NEXT: sub sp, sp, #16 1009; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 1010; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s 1011; CHECK-GI-NEXT: mov w8, w0 1012; CHECK-GI-NEXT: mov x9, sp 1013; CHECK-GI-NEXT: and x8, x8, #0x3 1014; CHECK-GI-NEXT: str q0, [sp] 1015; CHECK-GI-NEXT: ldr w0, [x9, x8, lsl #2] 1016; CHECK-GI-NEXT: add sp, sp, #16 1017; CHECK-GI-NEXT: ret 1018entry: 1019 %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) 1020 %d = extractelement <4 x i32> %vector, i32 %c 1021 ret i32 %d 1022} 1023 1024define i32 @extract_v4i32_vp_add_const(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %mask, i32 %evl) { 1025; CHECK-SD-LABEL: extract_v4i32_vp_add_const: 1026; CHECK-SD: // %bb.0: // %entry 1027; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s 1028; CHECK-SD-NEXT: mov w0, v0.s[3] 1029; CHECK-SD-NEXT: ret 1030; 1031; CHECK-GI-LABEL: extract_v4i32_vp_add_const: 1032; CHECK-GI: // %bb.0: // %entry 1033; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s 1034; CHECK-GI-NEXT: mov s0, v0.s[3] 1035; CHECK-GI-NEXT: fmov w0, s0 1036; CHECK-GI-NEXT: ret 1037entry: 1038 %vector = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl) 1039 %d = extractelement <4 x i32> %vector, i32 3 1040 ret i32 %d 1041} 1042 1043define i32 @extract_v4i32_phi(i64 %val, i32 %limit, ptr %ptr) { 1044; CHECK-SD-LABEL: extract_v4i32_phi: 1045; CHECK-SD: // %bb.0: // %entry 1046; CHECK-SD-NEXT: dup v1.2s, w0 1047; CHECK-SD-NEXT: adrp x8, .LCPI41_0 1048; CHECK-SD-NEXT: movi v0.2s, #16 1049; CHECK-SD-NEXT: ldr d2, [x8, :lo12:.LCPI41_0] 1050; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s 1051; CHECK-SD-NEXT: .LBB41_1: // %loop 1052; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 1053; CHECK-SD-NEXT: fmov w8, s1 1054; CHECK-SD-NEXT: add v1.2s, v1.2s, v0.2s 1055; CHECK-SD-NEXT: cmp w8, w1 1056; CHECK-SD-NEXT: add w0, w8, #10 1057; CHECK-SD-NEXT: str w0, [x2, w8, sxtw #2] 1058; CHECK-SD-NEXT: b.lo .LBB41_1 1059; CHECK-SD-NEXT: // %bb.2: // %ret 1060; CHECK-SD-NEXT: ret 1061; 1062; CHECK-GI-LABEL: extract_v4i32_phi: 1063; CHECK-GI: // %bb.0: // %entry 1064; CHECK-GI-NEXT: adrp x8, .LCPI41_0 1065; CHECK-GI-NEXT: dup v0.2d, x0 1066; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI41_0] 1067; CHECK-GI-NEXT: add v1.2d, v0.2d, v1.2d 1068; CHECK-GI-NEXT: movi v0.2s, #16 1069; CHECK-GI-NEXT: xtn v1.2s, v1.2d 1070; CHECK-GI-NEXT: .LBB41_1: // %loop 1071; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 1072; CHECK-GI-NEXT: fmov w8, s1 1073; CHECK-GI-NEXT: fmov w9, s1 1074; CHECK-GI-NEXT: add v1.2s, v1.2s, v0.2s 1075; CHECK-GI-NEXT: cmp w8, w1 1076; CHECK-GI-NEXT: add w0, w9, #10 1077; CHECK-GI-NEXT: str w0, [x2, w8, sxtw #2] 1078; CHECK-GI-NEXT: b.lo .LBB41_1 1079; CHECK-GI-NEXT: // %bb.2: // %ret 1080; CHECK-GI-NEXT: ret 1081entry: 1082 %tempvector = insertelement <2 x i64> undef, i64 %val, i32 0 1083 %vector = shufflevector <2 x i64> %tempvector, <2 x i64> undef, <2 x i32> zeroinitializer 1084 %0 = add <2 x i64> %vector, <i64 1, i64 2> 1085 %1 = trunc <2 x i64> %0 to <2 x i32> 1086 br label %loop 1087 1088loop: 1089 %2 = phi <2 x i32> [ %1, %entry ], [ %inc, %loop ] 1090 %elt = extractelement <2 x i32> %2, i32 0 1091 %end = icmp ult i32 %elt, %limit 1092 %3 = add i32 10, %elt 1093 %4 = sext i32 %elt to i64 1094 %5 = getelementptr i32, ptr %ptr, i64 %4 1095 store i32 %3, ptr %5 1096 %inc = add <2 x i32> %2, <i32 16, i32 16> 1097 br i1 %end, label %loop, label %ret 1098 1099ret: 1100 ret i32 %3 1101} 1102