1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=generic -aarch64-neon-syntax=apple -mattr="+fullfp16" | FileCheck %s 3 4define void @test0f(ptr nocapture %x, float %a) #0 { 5; CHECK-LABEL: test0f: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: movi.2d v1, #0000000000000000 8; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 9; CHECK-NEXT: mov.s v1[0], v0[0] 10; CHECK-NEXT: str q1, [x0] 11; CHECK-NEXT: ret 12entry: 13 %0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0 14 store <4 x float> %0, ptr %x, align 16 15 ret void 16} 17 18define void @test1f(ptr nocapture %x, float %a) #0 { 19; CHECK-LABEL: test1f: 20; CHECK: // %bb.0: // %entry 21; CHECK-NEXT: fmov.4s v1, #1.00000000 22; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 23; CHECK-NEXT: mov.s v1[0], v0[0] 24; CHECK-NEXT: str q1, [x0] 25; CHECK-NEXT: ret 26entry: 27 %0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0 28 store <4 x float> %0, ptr %x, align 16 29 ret void 30} 31 32define <16 x i8> @test_insert_v16i8_insert_1(i8 %a) { 33; CHECK-LABEL: test_insert_v16i8_insert_1: 34; CHECK: // %bb.0: 35; CHECK-NEXT: movi.2d v0, #0000000000000000 36; CHECK-NEXT: mov.b v0[14], w0 37; CHECK-NEXT: ret 38 %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 14 39 ret <16 x i8> %v.0 40} 41 42define <16 x i8> @test_insert_v16i8_insert_2(i8 %a) { 43; CHECK-LABEL: test_insert_v16i8_insert_2: 44; CHECK: // %bb.0: 45; CHECK-NEXT: movi.2d v0, #0000000000000000 46; CHECK-NEXT: mov.b v0[1], w0 47; CHECK-NEXT: mov.b v0[2], w0 48; CHECK-NEXT: ret 49 %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 2 50 %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1 51 ret <16 x i8> %v.1 52} 53 54define <16 x i8> @test_insert_v16i8_insert_2_undef_base(i8 %a) { 55; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base: 56; CHECK: // %bb.0: 57; CHECK-NEXT: dup.16b v0, w0 58; CHECK-NEXT: mov.b v0[5], wzr 59; CHECK-NEXT: mov.b v0[9], wzr 60; CHECK-NEXT: ret 61 %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> , i8 %a, i32 0 62 %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1 63 %v.2 = insertelement <16 x i8> %v.1, i8 %a, i32 2 64 %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3 65 %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4 66 %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6 67 %v.7 = insertelement <16 x i8> %v.6, i8 %a, i32 7 68 %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8 69 %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10 70 %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11 71 %v.12 = insertelement <16 x i8> %v.11, i8 %a, i32 12 72 %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13 73 %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14 74 %v.15 = insertelement <16 x i8> %v.14, i8 %a, i32 15 75 ret <16 x i8> %v.15 76} 77 78define <16 x i8> @test_insert_v16i8_insert_2_undef_base_different_valeus(i8 %a, i8 %b) { 79; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_different_valeus: 80; CHECK: // %bb.0: 81; CHECK-NEXT: dup.16b v0, w0 82; CHECK-NEXT: mov.b v0[2], w1 83; CHECK-NEXT: mov.b v0[5], wzr 84; CHECK-NEXT: mov.b v0[7], w1 85; CHECK-NEXT: mov.b v0[9], wzr 86; CHECK-NEXT: mov.b v0[12], w1 87; CHECK-NEXT: mov.b v0[15], w1 88; CHECK-NEXT: ret 89 %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> , i8 %a, i32 0 90 %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1 91 %v.2 = insertelement <16 x i8> %v.1, i8 %b, i32 2 92 %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3 93 %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4 94 %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6 95 %v.7 = insertelement <16 x i8> %v.6, i8 %b, i32 7 96 %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8 97 %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10 98 %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11 99 %v.12 = insertelement <16 x i8> %v.11, i8 %b, i32 12 100 %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13 101 %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14 102 %v.15 = insertelement <16 x i8> %v.14, i8 %b, i32 15 103 ret <16 x i8> %v.15 104} 105 106define <8 x half> @test_insert_v8f16_insert_1(half %a) { 107; CHECK-LABEL: test_insert_v8f16_insert_1: 108; CHECK: // %bb.0: 109; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 110; CHECK-NEXT: dup.8h v0, v0[0] 111; CHECK-NEXT: mov.h v0[7], wzr 112; CHECK-NEXT: ret 113 %v.0 = insertelement <8 x half> <half undef, half undef, half undef, half undef, half undef, half undef, half undef, half 0.0>, half %a, i32 0 114 %v.1 = insertelement <8 x half> %v.0, half %a, i32 1 115 %v.2 = insertelement <8 x half> %v.1, half %a, i32 2 116 %v.3 = insertelement <8 x half> %v.2, half %a, i32 3 117 %v.4 = insertelement <8 x half> %v.3, half %a, i32 4 118 %v.5 = insertelement <8 x half> %v.4, half %a, i32 5 119 %v.6 = insertelement <8 x half> %v.5, half %a, i32 6 120 ret <8 x half> %v.6 121} 122 123 124define <8 x half> @test_insert_v8f16_insert_2(half %a) { 125; CHECK-LABEL: test_insert_v8f16_insert_2: 126; CHECK: // %bb.0: 127; CHECK-NEXT: movi.2d v1, #0000000000000000 128; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 129; CHECK-NEXT: mov.h v1[1], v0[0] 130; CHECK-NEXT: mov.h v1[2], v0[0] 131; CHECK-NEXT: mov.16b v0, v1 132; CHECK-NEXT: ret 133 %v.0 = insertelement <8 x half> zeroinitializer, half %a, i32 2 134 %v.1 = insertelement <8 x half> %v.0, half %a, i32 1 135 ret <8 x half> %v.1 136} 137 138define <8 x i16> @test_insert_v8i16_insert_2(i16 %a) { 139; CHECK-LABEL: test_insert_v8i16_insert_2: 140; CHECK: // %bb.0: 141; CHECK-NEXT: dup.8h v0, w0 142; CHECK-NEXT: mov.h v0[3], wzr 143; CHECK-NEXT: mov.h v0[7], wzr 144; CHECK-NEXT: ret 145 %v.0 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0 146 %v.1 = insertelement <8 x i16> %v.0, i16 %a, i32 1 147 %v.2 = insertelement <8 x i16> %v.1, i16 %a, i32 2 148 %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4 149 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 150 %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6 151 ret <8 x i16> %v.5 152} 153 154define <8 x i16> @test_insert_v8i16_insert_3(i16 %a) { 155; CHECK-LABEL: test_insert_v8i16_insert_3: 156; CHECK: // %bb.0: 157; CHECK-NEXT: dup.8h v0, w0 158; CHECK-NEXT: mov.h v0[1], wzr 159; CHECK-NEXT: mov.h v0[3], wzr 160; CHECK-NEXT: mov.h v0[7], wzr 161; CHECK-NEXT: ret 162 %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0 163 %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2 164 %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4 165 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 166 %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6 167 ret <8 x i16> %v.5 168} 169 170define <8 x i16> @test_insert_v8i16_insert_4(i16 %a) { 171; CHECK-LABEL: test_insert_v8i16_insert_4: 172; CHECK: // %bb.0: 173; CHECK-NEXT: movi.2d v0, #0000000000000000 174; CHECK-NEXT: mov.h v0[0], w0 175; CHECK-NEXT: mov.h v0[2], w0 176; CHECK-NEXT: mov.h v0[4], w0 177; CHECK-NEXT: mov.h v0[5], w0 178; CHECK-NEXT: ret 179 %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0 180 %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2 181 %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4 182 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 183 ret <8 x i16> %v.4 184} 185 186define <8 x i16> @test_insert_v8i16_insert_5(i16 %a) { 187; CHECK-LABEL: test_insert_v8i16_insert_5: 188; CHECK: // %bb.0: 189; CHECK-NEXT: movi.2d v0, #0000000000000000 190; CHECK-NEXT: mov.h v0[0], w0 191; CHECK-NEXT: mov.h v0[4], w0 192; CHECK-NEXT: mov.h v0[5], w0 193; CHECK-NEXT: ret 194 %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0 195 %v.3 = insertelement <8 x i16> %v.0, i16 %a, i32 4 196 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 197 ret <8 x i16> %v.4 198} 199 200define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) { 201; CHECK-LABEL: test_insert_v2f32_undef_zero_vector: 202; CHECK: // %bb.0: 203; CHECK-NEXT: movi d1, #0000000000000000 204; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 205; CHECK-NEXT: mov.s v1[1], v0[0] 206; CHECK-NEXT: fmov d0, d1 207; CHECK-NEXT: ret 208 %v.0 = insertelement <2 x float> <float 0.000000e+00, float undef>, float %a, i32 1 209 ret <2 x float> %v.0 210} 211 212define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) { 213; CHECK-LABEL: test_insert_3_f32_undef_zero_vector: 214; CHECK: // %bb.0: 215; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 216; CHECK-NEXT: dup.4s v0, v0[0] 217; CHECK-NEXT: mov.s v0[3], wzr 218; CHECK-NEXT: ret 219 %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %a, i32 0 220 %v.1 = insertelement <4 x float> %v.0, float %a, i32 1 221 %v.2 = insertelement <4 x float> %v.1, float %a, i32 2 222 ret <4 x float> %v.2 223} 224 225define <4 x float> @test_insert_3_f32_undef(float %a) { 226; CHECK-LABEL: test_insert_3_f32_undef: 227; CHECK: // %bb.0: 228; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 229; CHECK-NEXT: dup.4s v0, v0[0] 230; CHECK-NEXT: ret 231 %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %a, i32 0 232 %v.1 = insertelement <4 x float> %v.0, float %a, i32 1 233 %v.2 = insertelement <4 x float> %v.1, float %a, i32 2 234 ret <4 x float> %v.2 235} 236 237define <4 x float> @test_insert_2_f32_undef_zero(float %a) { 238; CHECK-LABEL: test_insert_2_f32_undef_zero: 239; CHECK: // %bb.0: 240; CHECK-NEXT: movi.2d v1, #0000000000000000 241; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 242; CHECK-NEXT: mov.s v1[0], v0[0] 243; CHECK-NEXT: mov.s v1[2], v0[0] 244; CHECK-NEXT: mov.16b v0, v1 245; CHECK-NEXT: ret 246 %v.0 = insertelement <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, float %a, i32 0 247 %v.1 = insertelement <4 x float> %v.0, float %a, i32 2 248 ret <4 x float> %v.1 249} 250 251define <2 x double> @test_insert_v2f64_undef_insert1(double %a) { 252; CHECK-LABEL: test_insert_v2f64_undef_insert1: 253; CHECK: // %bb.0: 254; CHECK-NEXT: movi.2d v1, #0000000000000000 255; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 256; CHECK-NEXT: mov.d v1[0], v0[0] 257; CHECK-NEXT: mov.16b v0, v1 258; CHECK-NEXT: ret 259 %v.0 = insertelement <2 x double > <double undef, double 0.000000e+00>, double %a, i32 0 260 ret <2 x double> %v.0 261} 262 263define <4 x float> @test_insert_2_f32_var(float %a, <4 x float> %b) { 264; CHECK-LABEL: test_insert_2_f32_var: 265; CHECK: // %bb.0: 266; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 267; CHECK-NEXT: mov.s v1[0], v0[0] 268; CHECK-NEXT: mov.s v1[2], v0[0] 269; CHECK-NEXT: mov.16b v0, v1 270; CHECK-NEXT: ret 271 %v.0 = insertelement <4 x float> %b, float %a, i32 0 272 %v.1 = insertelement <4 x float> %v.0, float %a, i32 2 273 ret <4 x float> %v.1 274} 275 276define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) { 277; CHECK-LABEL: test_insert_v8i16_i16_zero: 278; CHECK: // %bb.0: 279; CHECK-NEXT: mov.h v0[5], wzr 280; CHECK-NEXT: ret 281 %v.0 = insertelement <8 x i16> %a, i16 0, i32 5 282 ret <8 x i16> %v.0 283} 284 285; TODO: This should jsut be a mov.s v0[3], wzr 286define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) { 287; CHECK-LABEL: test_insert_v4f16_f16_zero: 288; CHECK: // %bb.0: 289; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 290; CHECK-NEXT: mov.h v0[0], wzr 291; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 292; CHECK-NEXT: ret 293 %v.0 = insertelement <4 x half> %a, half 0.000000e+00, i32 0 294 ret <4 x half> %v.0 295} 296 297define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) { 298; CHECK-LABEL: test_insert_v8f16_f16_zero: 299; CHECK: // %bb.0: 300; CHECK-NEXT: mov.h v0[6], wzr 301; CHECK-NEXT: ret 302 %v.0 = insertelement <8 x half> %a, half 0.000000e+00, i32 6 303 ret <8 x half> %v.0 304} 305 306define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) { 307; CHECK-LABEL: test_insert_v2f32_f32_zero: 308; CHECK: // %bb.0: 309; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 310; CHECK-NEXT: mov.s v0[0], wzr 311; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 312; CHECK-NEXT: ret 313 %v.0 = insertelement <2 x float> %a, float 0.000000e+00, i32 0 314 ret <2 x float> %v.0 315} 316 317define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) { 318; CHECK-LABEL: test_insert_v4f32_f32_zero: 319; CHECK: // %bb.0: 320; CHECK-NEXT: mov.s v0[3], wzr 321; CHECK-NEXT: ret 322 %v.0 = insertelement <4 x float> %a, float 0.000000e+00, i32 3 323 ret <4 x float> %v.0 324} 325 326define <2 x double> @test_insert_v2f64_f64_zero(<2 x double> %a) { 327; CHECK-LABEL: test_insert_v2f64_f64_zero: 328; CHECK: // %bb.0: 329; CHECK-NEXT: mov.d v0[1], xzr 330; CHECK-NEXT: ret 331 %v.0 = insertelement <2 x double> %a, double 0.000000e+00, i32 1 332 ret <2 x double> %v.0 333} 334