1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabihf -mattr=+mve.fp | FileCheck %s --check-prefix=CHECKLE 3; RUN: llc < %s -mtriple=thumbebv8.1m.main-none-eabihf -mattr=+mve.fp | FileCheck %s --check-prefix=CHECKBE 4 5 6define <8 x i8> @inserti8_first(ptr %p) { 7; CHECKLE-LABEL: inserti8_first: 8; CHECKLE: @ %bb.0: 9; CHECKLE-NEXT: vldrb.u16 q0, [r0] 10; CHECKLE-NEXT: bx lr 11; 12; CHECKBE-LABEL: inserti8_first: 13; CHECKBE: @ %bb.0: 14; CHECKBE-NEXT: vldrb.u16 q1, [r0] 15; CHECKBE-NEXT: vrev64.16 q0, q1 16; CHECKBE-NEXT: bx lr 17 %q = getelementptr inbounds i8, ptr %p, i32 1 18 %l1 = load <8 x i8>, ptr %q 19 %l2 = load i8, ptr %p 20 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 21 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 22 ret <8 x i8> %ins 23} 24 25define <8 x i8> @inserti8_last(ptr %p) { 26; CHECKLE-LABEL: inserti8_last: 27; CHECKLE: @ %bb.0: 28; CHECKLE-NEXT: vldrb.u16 q0, [r0, #1] 29; CHECKLE-NEXT: bx lr 30; 31; CHECKBE-LABEL: inserti8_last: 32; CHECKBE: @ %bb.0: 33; CHECKBE-NEXT: vldrb.u16 q1, [r0, #1] 34; CHECKBE-NEXT: vrev64.16 q0, q1 35; CHECKBE-NEXT: bx lr 36 %q = getelementptr inbounds i8, ptr %p, i32 8 37 %l1 = load <8 x i8>, ptr %p 38 %l2 = load i8, ptr %q 39 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 40 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7 41 ret <8 x i8> %ins 42} 43 44define <8 x i16> @inserti8_first_sext(ptr %p) { 45; CHECKLE-LABEL: inserti8_first_sext: 46; CHECKLE: @ %bb.0: 47; CHECKLE-NEXT: vldrb.s16 q0, [r0] 48; CHECKLE-NEXT: bx lr 49; 50; CHECKBE-LABEL: inserti8_first_sext: 51; CHECKBE: @ %bb.0: 52; CHECKBE-NEXT: vldrb.s16 q1, [r0] 53; CHECKBE-NEXT: vrev64.16 q0, q1 54; CHECKBE-NEXT: bx lr 55 %q = getelementptr inbounds i8, ptr %p, i32 1 56 %l1 = load <8 x i8>, ptr %q 57 %s1 = sext <8 x i8> %l1 to <8 x i16> 58 %l2 = load i8, ptr %p 59 %s2 = sext i8 %l2 to i16 60 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 61 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0 62 ret <8 x i16> %ins 63} 64 65define <8 x i16> @inserti8_last_sext(ptr %p) { 66; CHECKLE-LABEL: inserti8_last_sext: 67; CHECKLE: @ %bb.0: 68; CHECKLE-NEXT: vldrb.s16 q0, [r0, #1] 69; CHECKLE-NEXT: bx lr 70; 71; CHECKBE-LABEL: inserti8_last_sext: 72; CHECKBE: @ %bb.0: 73; CHECKBE-NEXT: vldrb.s16 q1, [r0, #1] 74; CHECKBE-NEXT: vrev64.16 q0, q1 75; CHECKBE-NEXT: bx lr 76 %q = getelementptr inbounds i8, ptr %p, i32 8 77 %l1 = load <8 x i8>, ptr %p 78 %s1 = sext <8 x i8> %l1 to <8 x i16> 79 %l2 = load i8, ptr %q 80 %s2 = sext i8 %l2 to i16 81 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 82 %ins = insertelement <8 x i16> %s, i16 %s2, i32 7 83 ret <8 x i16> %ins 84} 85 86define <8 x i16> @inserti8_first_zext(ptr %p) { 87; CHECKLE-LABEL: inserti8_first_zext: 88; CHECKLE: @ %bb.0: 89; CHECKLE-NEXT: vldrb.u16 q0, [r0] 90; CHECKLE-NEXT: bx lr 91; 92; CHECKBE-LABEL: inserti8_first_zext: 93; CHECKBE: @ %bb.0: 94; CHECKBE-NEXT: vldrb.u16 q1, [r0] 95; CHECKBE-NEXT: vrev64.16 q0, q1 96; CHECKBE-NEXT: bx lr 97 %q = getelementptr inbounds i8, ptr %p, i32 1 98 %l1 = load <8 x i8>, ptr %q 99 %s1 = zext <8 x i8> %l1 to <8 x i16> 100 %l2 = load i8, ptr %p 101 %s2 = zext i8 %l2 to i16 102 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 103 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0 104 ret <8 x i16> %ins 105} 106 107define <8 x i16> @inserti8_last_zext(ptr %p) { 108; CHECKLE-LABEL: inserti8_last_zext: 109; CHECKLE: @ %bb.0: 110; CHECKLE-NEXT: vldrb.u16 q0, [r0, #1] 111; CHECKLE-NEXT: bx lr 112; 113; CHECKBE-LABEL: inserti8_last_zext: 114; CHECKBE: @ %bb.0: 115; CHECKBE-NEXT: vldrb.u16 q1, [r0, #1] 116; CHECKBE-NEXT: vrev64.16 q0, q1 117; CHECKBE-NEXT: bx lr 118 %q = getelementptr inbounds i8, ptr %p, i32 8 119 %l1 = load <8 x i8>, ptr %p 120 %s1 = zext <8 x i8> %l1 to <8 x i16> 121 %l2 = load i8, ptr %q 122 %s2 = zext i8 %l2 to i16 123 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 124 %ins = insertelement <8 x i16> %s, i16 %s2, i32 7 125 ret <8 x i16> %ins 126} 127 128define <8 x i32> @inserti32_first(ptr %p) { 129; CHECKLE-LABEL: inserti32_first: 130; CHECKLE: @ %bb.0: 131; CHECKLE-NEXT: vldrw.u32 q2, [r0, #20] 132; CHECKLE-NEXT: vldr s4, [r0, #16] 133; CHECKLE-NEXT: vldrw.u32 q0, [r0] 134; CHECKLE-NEXT: vmov.f32 s5, s8 135; CHECKLE-NEXT: vmov.f32 s6, s9 136; CHECKLE-NEXT: vmov.f32 s7, s10 137; CHECKLE-NEXT: bx lr 138; 139; CHECKBE-LABEL: inserti32_first: 140; CHECKBE: @ %bb.0: 141; CHECKBE-NEXT: vldrw.u32 q3, [r0, #20] 142; CHECKBE-NEXT: vldrb.u8 q1, [r0] 143; CHECKBE-NEXT: vldr s8, [r0, #16] 144; CHECKBE-NEXT: vmov.f32 s9, s12 145; CHECKBE-NEXT: vrev64.8 q0, q1 146; CHECKBE-NEXT: vmov.f32 s10, s13 147; CHECKBE-NEXT: vmov.f32 s11, s14 148; CHECKBE-NEXT: vrev64.32 q1, q2 149; CHECKBE-NEXT: bx lr 150 %q = getelementptr inbounds i8, ptr %p, i32 4 151 %l1 = load <8 x i32>, ptr %q 152 %l2 = load i32, ptr %p 153 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 154 %ins = insertelement <8 x i32> %s, i32 %l2, i32 0 155 ret <8 x i32> %ins 156} 157 158define <8 x i32> @inserti32_last(ptr %p) { 159; CHECKLE-LABEL: inserti32_last: 160; CHECKLE: @ %bb.0: 161; CHECKLE-NEXT: vldrw.u32 q2, [r0] 162; CHECKLE-NEXT: vldr s3, [r0, #16] 163; CHECKLE-NEXT: vldrw.u32 q1, [r0, #20] 164; CHECKLE-NEXT: vmov.f32 s0, s9 165; CHECKLE-NEXT: vmov.f32 s1, s10 166; CHECKLE-NEXT: vmov.f32 s2, s11 167; CHECKLE-NEXT: bx lr 168; 169; CHECKBE-LABEL: inserti32_last: 170; CHECKBE: @ %bb.0: 171; CHECKBE-NEXT: vldrw.u32 q3, [r0] 172; CHECKBE-NEXT: vldrb.u8 q0, [r0, #20] 173; CHECKBE-NEXT: vldr s11, [r0, #16] 174; CHECKBE-NEXT: vmov.f32 s8, s13 175; CHECKBE-NEXT: vrev64.8 q1, q0 176; CHECKBE-NEXT: vmov.f32 s9, s14 177; CHECKBE-NEXT: vmov.f32 s10, s15 178; CHECKBE-NEXT: vrev64.32 q0, q2 179; CHECKBE-NEXT: bx lr 180 %q = getelementptr inbounds i8, ptr %p, i32 32 181 %l1 = load <8 x i32>, ptr %p 182 %l2 = load i32, ptr %q 183 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 184 %ins = insertelement <8 x i32> %s, i32 %l2, i32 7 185 ret <8 x i32> %ins 186} 187 188define <8 x i32> @inserti32_first_multiuse(ptr %p) { 189; CHECKLE-LABEL: inserti32_first_multiuse: 190; CHECKLE: @ %bb.0: 191; CHECKLE-NEXT: vldrw.u32 q0, [r0, #20] 192; CHECKLE-NEXT: vldrw.u32 q2, [r0, #4] 193; CHECKLE-NEXT: vmov.f32 s4, s11 194; CHECKLE-NEXT: vmov.f32 s5, s0 195; CHECKLE-NEXT: vmov.f32 s6, s1 196; CHECKLE-NEXT: vmov.f32 s7, s2 197; CHECKLE-NEXT: vadd.i32 q1, q0, q1 198; CHECKLE-NEXT: vldrw.u32 q0, [r0] 199; CHECKLE-NEXT: vadd.i32 q0, q2, q0 200; CHECKLE-NEXT: bx lr 201; 202; CHECKBE-LABEL: inserti32_first_multiuse: 203; CHECKBE: @ %bb.0: 204; CHECKBE-NEXT: vldrw.u32 q0, [r0, #20] 205; CHECKBE-NEXT: vldrw.u32 q2, [r0, #4] 206; CHECKBE-NEXT: vmov.f32 s4, s11 207; CHECKBE-NEXT: vmov.f32 s5, s0 208; CHECKBE-NEXT: vmov.f32 s6, s1 209; CHECKBE-NEXT: vmov.f32 s7, s2 210; CHECKBE-NEXT: vadd.i32 q0, q0, q1 211; CHECKBE-NEXT: vrev64.32 q1, q0 212; CHECKBE-NEXT: vldrw.u32 q0, [r0] 213; CHECKBE-NEXT: vadd.i32 q2, q2, q0 214; CHECKBE-NEXT: vrev64.32 q0, q2 215; CHECKBE-NEXT: bx lr 216 %q = getelementptr inbounds i8, ptr %p, i32 4 217 %l1 = load <8 x i32>, ptr %q 218 %l2 = load i32, ptr %p 219 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 220 %ins = insertelement <8 x i32> %s, i32 %l2, i32 0 221 %a = add <8 x i32> %l1, %ins 222 ret <8 x i32> %a 223} 224 225define <8 x i32> @inserti32_last_multiuse(ptr %p) { 226; CHECKLE-LABEL: inserti32_last_multiuse: 227; CHECKLE: @ %bb.0: 228; CHECKLE-NEXT: vldrw.u32 q0, [r0] 229; CHECKLE-NEXT: vldrw.u32 q1, [r0, #16] 230; CHECKLE-NEXT: vmov.f32 s8, s1 231; CHECKLE-NEXT: vmov.f32 s9, s2 232; CHECKLE-NEXT: vmov.f32 s10, s3 233; CHECKLE-NEXT: vmov.f32 s11, s4 234; CHECKLE-NEXT: vadd.i32 q0, q0, q2 235; CHECKLE-NEXT: vldrw.u32 q2, [r0, #20] 236; CHECKLE-NEXT: vadd.i32 q1, q1, q2 237; CHECKLE-NEXT: bx lr 238; 239; CHECKBE-LABEL: inserti32_last_multiuse: 240; CHECKBE: @ %bb.0: 241; CHECKBE-NEXT: vldrw.u32 q0, [r0] 242; CHECKBE-NEXT: vldrw.u32 q1, [r0, #16] 243; CHECKBE-NEXT: vmov.f32 s8, s1 244; CHECKBE-NEXT: vmov.f32 s9, s2 245; CHECKBE-NEXT: vmov.f32 s10, s3 246; CHECKBE-NEXT: vmov.f32 s11, s4 247; CHECKBE-NEXT: vadd.i32 q2, q0, q2 248; CHECKBE-NEXT: vrev64.32 q0, q2 249; CHECKBE-NEXT: vldrw.u32 q2, [r0, #20] 250; CHECKBE-NEXT: vadd.i32 q2, q1, q2 251; CHECKBE-NEXT: vrev64.32 q1, q2 252; CHECKBE-NEXT: bx lr 253 %q = getelementptr inbounds i8, ptr %p, i32 32 254 %l1 = load <8 x i32>, ptr %p 255 %l2 = load i32, ptr %q 256 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 257 %ins = insertelement <8 x i32> %s, i32 %l2, i32 7 258 %a = add <8 x i32> %l1, %ins 259 ret <8 x i32> %a 260} 261 262define <4 x float> @insertf32_first(ptr %p) { 263; CHECKLE-LABEL: insertf32_first: 264; CHECKLE: @ %bb.0: 265; CHECKLE-NEXT: vldrw.u32 q0, [r0] 266; CHECKLE-NEXT: bx lr 267; 268; CHECKBE-LABEL: insertf32_first: 269; CHECKBE: @ %bb.0: 270; CHECKBE-NEXT: vldrb.u8 q1, [r0] 271; CHECKBE-NEXT: vrev64.8 q0, q1 272; CHECKBE-NEXT: bx lr 273 %q = getelementptr inbounds i8, ptr %p, i32 4 274 %l1 = load <4 x float>, ptr %q 275 %l2 = load float, ptr %p 276 %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2> 277 %ins = insertelement <4 x float> %s, float %l2, i32 0 278 ret <4 x float> %ins 279} 280 281define <4 x float> @insertf32_last(ptr %p) { 282; CHECKLE-LABEL: insertf32_last: 283; CHECKLE: @ %bb.0: 284; CHECKLE-NEXT: vldrw.u32 q0, [r0, #4] 285; CHECKLE-NEXT: bx lr 286; 287; CHECKBE-LABEL: insertf32_last: 288; CHECKBE: @ %bb.0: 289; CHECKBE-NEXT: vldrb.u8 q1, [r0, #4] 290; CHECKBE-NEXT: vrev64.8 q0, q1 291; CHECKBE-NEXT: bx lr 292 %q = getelementptr inbounds i8, ptr %p, i32 16 293 %l1 = load <4 x float>, ptr %p 294 %l2 = load float, ptr %q 295 %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef> 296 %ins = insertelement <4 x float> %s, float %l2, i32 3 297 ret <4 x float> %ins 298} 299