1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=aarch64-none-eabi -o - %s | FileCheck %s 3 4define <2 x i64> @v2i64_02(<4 x i32> %a, <4 x i32> %b) { 5; CHECK-LABEL: v2i64_02: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 8; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s 9; CHECK-NEXT: ushll v0.2d, v0.2s, #0 10; CHECK-NEXT: ret 11 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 0, i32 2> 12 %d = zext <2 x i32> %c to <2 x i64> 13 ret <2 x i64> %d 14} 15 16define <2 x i64> @v2i64_13(<4 x i32> %a, <4 x i32> %b) { 17; CHECK-LABEL: v2i64_13: 18; CHECK: // %bb.0: 19; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 20; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s 21; CHECK-NEXT: ushll v0.2d, v0.2s, #0 22; CHECK-NEXT: ret 23 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 1, i32 3> 24 %d = zext <2 x i32> %c to <2 x i64> 25 ret <2 x i64> %d 26} 27 28define <2 x i64> @v2i64_04812(<4 x i32> %a, <4 x i32> %b) { 29; CHECK-LABEL: v2i64_04812: 30; CHECK: // %bb.0: 31; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s 32; CHECK-NEXT: ushll v0.2d, v0.2s, #0 33; CHECK-NEXT: ret 34 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 0, i32 4> 35 %d = zext <2 x i32> %c to <2 x i64> 36 ret <2 x i64> %d 37} 38 39define <2 x i64> @v2i64_15913(<4 x i32> %a, <4 x i32> %b) { 40; CHECK-LABEL: v2i64_15913: 41; CHECK: // %bb.0: 42; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s 43; CHECK-NEXT: ushll v0.2d, v0.2s, #0 44; CHECK-NEXT: ret 45 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 1, i32 5> 46 %d = zext <2 x i32> %c to <2 x i64> 47 ret <2 x i64> %d 48} 49 50define <2 x i64> @v2i64_261014(<4 x i32> %a, <4 x i32> %b) { 51; CHECK-LABEL: v2i64_261014: 52; CHECK: // %bb.0: 53; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 54; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 55; CHECK-NEXT: zip1 v0.2s, v0.2s, v1.2s 56; CHECK-NEXT: ushll v0.2d, v0.2s, #0 57; CHECK-NEXT: ret 58 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 2, i32 6> 59 %d = zext <2 x i32> %c to <2 x i64> 60 ret <2 x i64> %d 61} 62 63define <2 x i64> @v2i64_37(<4 x i32> %a, <4 x i32> %b) { 64; CHECK-LABEL: v2i64_37: 65; CHECK: // %bb.0: 66; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 67; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 68; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s 69; CHECK-NEXT: ushll v0.2d, v0.2s, #0 70; CHECK-NEXT: ret 71 %c = shufflevector <4 x i32> %a, <4 x i32> %b, <2 x i32> <i32 3, i32 7> 72 %d = zext <2 x i32> %c to <2 x i64> 73 ret <2 x i64> %d 74} 75 76define <4 x i64> @v2i64_i16_04812(<16 x i16> %a) { 77; CHECK-LABEL: v2i64_i16_04812: 78; CHECK: // %bb.0: 79; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff 80; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s 81; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 82; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 83; CHECK-NEXT: ushll v0.2d, v0.2s, #0 84; CHECK-NEXT: ret 85 %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 86 %z1 = zext <4 x i16> %s1 to <4 x i64> 87 ret <4 x i64> %z1 88} 89 90define <4 x i64> @v2i64_i16_15913(<16 x i16> %a) { 91; CHECK-LABEL: v2i64_i16_15913: 92; CHECK: // %bb.0: 93; CHECK-NEXT: movi v2.2d, #0x0000000000ffff 94; CHECK-NEXT: ushr v0.2d, v0.2d, #16 95; CHECK-NEXT: ushr v1.2d, v1.2d, #16 96; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 97; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 98; CHECK-NEXT: ret 99 %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 100 %z1 = zext <4 x i16> %s1 to <4 x i64> 101 ret <4 x i64> %z1 102} 103 104define <4 x i64> @v2i64_i16_261014(<16 x i16> %a) { 105; CHECK-LABEL: v2i64_i16_261014: 106; CHECK: // %bb.0: 107; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff 108; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s 109; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 110; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 111; CHECK-NEXT: ushll v0.2d, v0.2s, #0 112; CHECK-NEXT: ret 113 %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 114 %z1 = zext <4 x i16> %s1 to <4 x i64> 115 ret <4 x i64> %z1 116} 117 118define <4 x i64> @v2i64_i16_371115(<16 x i16> %a) { 119; CHECK-LABEL: v2i64_i16_371115: 120; CHECK: // %bb.0: 121; CHECK-NEXT: ushr v0.2d, v0.2d, #48 122; CHECK-NEXT: ushr v1.2d, v1.2d, #48 123; CHECK-NEXT: ret 124 %s1 = shufflevector <16 x i16> %a, <16 x i16> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 125 %z1 = zext <4 x i16> %s1 to <4 x i64> 126 ret <4 x i64> %z1 127} 128 129 130define <4 x i32> @v4i32_0246(<8 x i16> %a, <8 x i16> %b) { 131; CHECK-LABEL: v4i32_0246: 132; CHECK: // %bb.0: 133; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff 134; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 135; CHECK-NEXT: ret 136 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 137 %d = zext <4 x i16> %c to <4 x i32> 138 ret <4 x i32> %d 139} 140 141define <4 x i32> @v4i32_1357(<8 x i16> %a, <8 x i16> %b) { 142; CHECK-LABEL: v4i32_1357: 143; CHECK: // %bb.0: 144; CHECK-NEXT: ushr v0.4s, v0.4s, #16 145; CHECK-NEXT: ret 146 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 147 %d = zext <4 x i16> %c to <4 x i32> 148 ret <4 x i32> %d 149} 150 151define <4 x i32> @v4i32_04812(<8 x i16> %a, <8 x i16> %b) { 152; CHECK-LABEL: v4i32_04812: 153; CHECK: // %bb.0: 154; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff 155; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s 156; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 157; CHECK-NEXT: ret 158 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 159 %d = zext <4 x i16> %c to <4 x i32> 160 ret <4 x i32> %d 161} 162 163define <4 x i32> @v4i32_15913(<8 x i16> %a, <8 x i16> %b) { 164; CHECK-LABEL: v4i32_15913: 165; CHECK: // %bb.0: 166; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s 167; CHECK-NEXT: ushr v0.4s, v0.4s, #16 168; CHECK-NEXT: ret 169 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 170 %d = zext <4 x i16> %c to <4 x i32> 171 ret <4 x i32> %d 172} 173 174define <4 x i32> @v4i32_261014(<8 x i16> %a, <8 x i16> %b) { 175; CHECK-LABEL: v4i32_261014: 176; CHECK: // %bb.0: 177; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff 178; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s 179; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 180; CHECK-NEXT: ret 181 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 182 %d = zext <4 x i16> %c to <4 x i32> 183 ret <4 x i32> %d 184} 185 186define <4 x i32> @v4i32_371115(<8 x i16> %a, <8 x i16> %b) { 187; CHECK-LABEL: v4i32_371115: 188; CHECK: // %bb.0: 189; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s 190; CHECK-NEXT: ushr v0.4s, v0.4s, #16 191; CHECK-NEXT: ret 192 %c = shufflevector <8 x i16> %a, <8 x i16> %b, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 193 %d = zext <4 x i16> %c to <4 x i32> 194 ret <4 x i32> %d 195} 196 197 198define <8 x i16> @v8i16_0246(<16 x i8> %a, <16 x i8> %b) { 199; CHECK-LABEL: v8i16_0246: 200; CHECK: // %bb.0: 201; CHECK-NEXT: bic v0.8h, #255, lsl #8 202; CHECK-NEXT: ret 203 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 204 %d = zext <8 x i8> %c to <8 x i16> 205 ret <8 x i16> %d 206} 207 208define <8 x i16> @v8i16_1357(<16 x i8> %a, <16 x i8> %b) { 209; CHECK-LABEL: v8i16_1357: 210; CHECK: // %bb.0: 211; CHECK-NEXT: ushr v0.8h, v0.8h, #8 212; CHECK-NEXT: ret 213 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 214 %d = zext <8 x i8> %c to <8 x i16> 215 ret <8 x i16> %d 216} 217 218define <8 x i16> @v8i16_04812(<16 x i8> %a, <16 x i8> %b) { 219; CHECK-LABEL: v8i16_04812: 220; CHECK: // %bb.0: 221; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 222; CHECK-NEXT: bic v0.8h, #255, lsl #8 223; CHECK-NEXT: ret 224 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 225 %d = zext <8 x i8> %c to <8 x i16> 226 ret <8 x i16> %d 227} 228 229define <8 x i16> @v8i16_15913(<16 x i8> %a, <16 x i8> %b) { 230; CHECK-LABEL: v8i16_15913: 231; CHECK: // %bb.0: 232; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 233; CHECK-NEXT: ushr v0.8h, v0.8h, #8 234; CHECK-NEXT: ret 235 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 236 %d = zext <8 x i8> %c to <8 x i16> 237 ret <8 x i16> %d 238} 239 240define <8 x i16> @v8i16_261014(<16 x i8> %a, <16 x i8> %b) { 241; CHECK-LABEL: v8i16_261014: 242; CHECK: // %bb.0: 243; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h 244; CHECK-NEXT: bic v0.8h, #255, lsl #8 245; CHECK-NEXT: ret 246 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 247 %d = zext <8 x i8> %c to <8 x i16> 248 ret <8 x i16> %d 249} 250 251define <8 x i16> @v8i16_371115(<16 x i8> %a, <16 x i8> %b) { 252; CHECK-LABEL: v8i16_371115: 253; CHECK: // %bb.0: 254; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h 255; CHECK-NEXT: ushr v0.8h, v0.8h, #8 256; CHECK-NEXT: ret 257 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 258 %d = zext <8 x i8> %c to <8 x i16> 259 ret <8 x i16> %d 260} 261 262 263define <8 x i32> @v8i32_0246(<16 x i8> %a, <16 x i8> %b) { 264; CHECK-LABEL: v8i32_0246: 265; CHECK: // %bb.0: 266; CHECK-NEXT: bic v0.8h, #255, lsl #8 267; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 268; CHECK-NEXT: ushll v0.4s, v0.4h, #0 269; CHECK-NEXT: ret 270 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 271 %d = zext <8 x i8> %c to <8 x i32> 272 ret <8 x i32> %d 273} 274 275define <8 x i32> @v8i32_1357(<16 x i8> %a, <16 x i8> %b) { 276; CHECK-LABEL: v8i32_1357: 277; CHECK: // %bb.0: 278; CHECK-NEXT: ushr v0.8h, v0.8h, #8 279; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 280; CHECK-NEXT: ushll v0.4s, v0.4h, #0 281; CHECK-NEXT: ret 282 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 283 %d = zext <8 x i8> %c to <8 x i32> 284 ret <8 x i32> %d 285} 286 287define <8 x i32> @v8i32_04812(<16 x i8> %a, <16 x i8> %b) { 288; CHECK-LABEL: v8i32_04812: 289; CHECK: // %bb.0: 290; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff 291; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 292; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 293; CHECK-NEXT: ret 294 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 295 %d = zext <8 x i8> %c to <8 x i32> 296 ret <8 x i32> %d 297} 298 299define <8 x i32> @v8i32_15913(<16 x i8> %a, <16 x i8> %b) { 300; CHECK-LABEL: v8i32_15913: 301; CHECK: // %bb.0: 302; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff 303; CHECK-NEXT: ushr v0.4s, v0.4s, #8 304; CHECK-NEXT: ushr v1.4s, v1.4s, #8 305; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 306; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 307; CHECK-NEXT: ret 308 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 309 %d = zext <8 x i8> %c to <8 x i32> 310 ret <8 x i32> %d 311} 312 313define <8 x i32> @v8i32_261014(<16 x i8> %a, <16 x i8> %b) { 314; CHECK-LABEL: v8i32_261014: 315; CHECK: // %bb.0: 316; CHECK-NEXT: ushr v0.4s, v0.4s, #16 317; CHECK-NEXT: ushr v1.4s, v1.4s, #16 318; CHECK-NEXT: bic v0.4s, #255, lsl #8 319; CHECK-NEXT: bic v1.4s, #255, lsl #8 320; CHECK-NEXT: ret 321 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 322 %d = zext <8 x i8> %c to <8 x i32> 323 ret <8 x i32> %d 324} 325 326define <8 x i32> @v8i32_371115(<16 x i8> %a, <16 x i8> %b) { 327; CHECK-LABEL: v8i32_371115: 328; CHECK: // %bb.0: 329; CHECK-NEXT: ushr v0.4s, v0.4s, #24 330; CHECK-NEXT: ushr v1.4s, v1.4s, #24 331; CHECK-NEXT: ret 332 %c = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 333 %d = zext <8 x i8> %c to <8 x i32> 334 ret <8 x i32> %d 335} 336 337 338define <8 x i64> @zext_add(<32 x i16> %l) { 339; CHECK-LABEL: zext_add: 340; CHECK: // %bb.0: 341; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff 342; CHECK-NEXT: uzp1 v5.4s, v0.4s, v1.4s 343; CHECK-NEXT: uzp2 v0.4s, v0.4s, v1.4s 344; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s 345; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s 346; CHECK-NEXT: and v3.16b, v5.16b, v4.16b 347; CHECK-NEXT: and v6.16b, v0.16b, v4.16b 348; CHECK-NEXT: and v7.16b, v1.16b, v4.16b 349; CHECK-NEXT: and v4.16b, v2.16b, v4.16b 350; CHECK-NEXT: usra v3.4s, v5.4s, #16 351; CHECK-NEXT: usra v6.4s, v0.4s, #16 352; CHECK-NEXT: usra v7.4s, v1.4s, #16 353; CHECK-NEXT: usra v4.4s, v2.4s, #16 354; CHECK-NEXT: uaddl v0.2d, v3.2s, v6.2s 355; CHECK-NEXT: uaddl2 v1.2d, v3.4s, v6.4s 356; CHECK-NEXT: uaddl2 v3.2d, v7.4s, v4.4s 357; CHECK-NEXT: uaddl v2.2d, v7.2s, v4.2s 358; CHECK-NEXT: ret 359 %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 360 %z1 = zext <8 x i16> %s1 to <8 x i64> 361 %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 362 %z2 = zext <8 x i16> %s2 to <8 x i64> 363 %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 364 %z3 = zext <8 x i16> %s3 to <8 x i64> 365 %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 366 %z4 = zext <8 x i16> %s4 to <8 x i64> 367 %a = add <8 x i64> %z1, %z2 368 %b = add <8 x i64> %z3, %z4 369 %c = add <8 x i64> %a, %b 370 ret <8 x i64> %c 371} 372 373define <8 x i64> @zext_load_add(ptr %p) { 374; CHECK-LABEL: zext_load_add: 375; CHECK: // %bb.0: 376; CHECK-NEXT: ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0] 377; CHECK-NEXT: uaddl v4.4s, v0.4h, v1.4h 378; CHECK-NEXT: uaddl v5.4s, v2.4h, v3.4h 379; CHECK-NEXT: uaddl2 v6.4s, v0.8h, v1.8h 380; CHECK-NEXT: uaddl2 v2.4s, v2.8h, v3.8h 381; CHECK-NEXT: uaddl v0.2d, v4.2s, v5.2s 382; CHECK-NEXT: uaddl2 v1.2d, v4.4s, v5.4s 383; CHECK-NEXT: uaddl2 v3.2d, v6.4s, v2.4s 384; CHECK-NEXT: uaddl v2.2d, v6.2s, v2.2s 385; CHECK-NEXT: ret 386 %l = load <32 x i16>, ptr %p 387 %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 388 %z1 = zext <8 x i16> %s1 to <8 x i64> 389 %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 390 %z2 = zext <8 x i16> %s2 to <8 x i64> 391 %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 392 %z3 = zext <8 x i16> %s3 to <8 x i64> 393 %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 394 %z4 = zext <8 x i16> %s4 to <8 x i64> 395 %a = add <8 x i64> %z1, %z2 396 %b = add <8 x i64> %z3, %z4 397 %c = add <8 x i64> %a, %b 398 ret <8 x i64> %c 399} 400 401define <8 x double> @uitofp_fadd(<32 x i16> %l) { 402; CHECK-LABEL: uitofp_fadd: 403; CHECK: // %bb.0: 404; CHECK-NEXT: movi v4.2d, #0x0000000000ffff 405; CHECK-NEXT: ushr v5.2d, v0.2d, #16 406; CHECK-NEXT: ushr v6.2d, v1.2d, #16 407; CHECK-NEXT: ushr v7.2d, v2.2d, #16 408; CHECK-NEXT: ushr v17.2d, v3.2d, #16 409; CHECK-NEXT: ushr v20.2d, v0.2d, #32 410; CHECK-NEXT: ushr v22.2d, v1.2d, #32 411; CHECK-NEXT: ushr v23.2d, v2.2d, #32 412; CHECK-NEXT: ushr v24.2d, v3.2d, #32 413; CHECK-NEXT: and v16.16b, v0.16b, v4.16b 414; CHECK-NEXT: and v18.16b, v1.16b, v4.16b 415; CHECK-NEXT: and v19.16b, v2.16b, v4.16b 416; CHECK-NEXT: and v21.16b, v3.16b, v4.16b 417; CHECK-NEXT: and v5.16b, v5.16b, v4.16b 418; CHECK-NEXT: and v6.16b, v6.16b, v4.16b 419; CHECK-NEXT: and v7.16b, v7.16b, v4.16b 420; CHECK-NEXT: and v17.16b, v17.16b, v4.16b 421; CHECK-NEXT: and v20.16b, v20.16b, v4.16b 422; CHECK-NEXT: and v22.16b, v22.16b, v4.16b 423; CHECK-NEXT: and v23.16b, v23.16b, v4.16b 424; CHECK-NEXT: and v4.16b, v24.16b, v4.16b 425; CHECK-NEXT: ushr v0.2d, v0.2d, #48 426; CHECK-NEXT: ushr v1.2d, v1.2d, #48 427; CHECK-NEXT: ushr v2.2d, v2.2d, #48 428; CHECK-NEXT: ushr v3.2d, v3.2d, #48 429; CHECK-NEXT: ucvtf v16.2d, v16.2d 430; CHECK-NEXT: ucvtf v18.2d, v18.2d 431; CHECK-NEXT: ucvtf v19.2d, v19.2d 432; CHECK-NEXT: ucvtf v21.2d, v21.2d 433; CHECK-NEXT: ucvtf v5.2d, v5.2d 434; CHECK-NEXT: ucvtf v6.2d, v6.2d 435; CHECK-NEXT: ucvtf v7.2d, v7.2d 436; CHECK-NEXT: ucvtf v17.2d, v17.2d 437; CHECK-NEXT: ucvtf v20.2d, v20.2d 438; CHECK-NEXT: ucvtf v22.2d, v22.2d 439; CHECK-NEXT: ucvtf v23.2d, v23.2d 440; CHECK-NEXT: ucvtf v4.2d, v4.2d 441; CHECK-NEXT: ucvtf v0.2d, v0.2d 442; CHECK-NEXT: ucvtf v1.2d, v1.2d 443; CHECK-NEXT: ucvtf v2.2d, v2.2d 444; CHECK-NEXT: ucvtf v3.2d, v3.2d 445; CHECK-NEXT: fadd v5.2d, v16.2d, v5.2d 446; CHECK-NEXT: fadd v17.2d, v21.2d, v17.2d 447; CHECK-NEXT: fadd v7.2d, v19.2d, v7.2d 448; CHECK-NEXT: fadd v6.2d, v18.2d, v6.2d 449; CHECK-NEXT: fadd v0.2d, v20.2d, v0.2d 450; CHECK-NEXT: fadd v1.2d, v22.2d, v1.2d 451; CHECK-NEXT: fadd v3.2d, v4.2d, v3.2d 452; CHECK-NEXT: fadd v2.2d, v23.2d, v2.2d 453; CHECK-NEXT: fadd v0.2d, v5.2d, v0.2d 454; CHECK-NEXT: fadd v1.2d, v6.2d, v1.2d 455; CHECK-NEXT: fadd v2.2d, v7.2d, v2.2d 456; CHECK-NEXT: fadd v3.2d, v17.2d, v3.2d 457; CHECK-NEXT: ret 458 %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 459 %z1 = uitofp <8 x i16> %s1 to <8 x double> 460 %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 461 %z2 = uitofp <8 x i16> %s2 to <8 x double> 462 %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 463 %z3 = uitofp <8 x i16> %s3 to <8 x double> 464 %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 465 %z4 = uitofp <8 x i16> %s4 to <8 x double> 466 %a = fadd <8 x double> %z1, %z2 467 %b = fadd <8 x double> %z3, %z4 468 %c = fadd <8 x double> %a, %b 469 ret <8 x double> %c 470} 471 472define <8 x double> @uitofp_load_fadd(ptr %p) { 473; CHECK-LABEL: uitofp_load_fadd: 474; CHECK: // %bb.0: 475; CHECK-NEXT: ldp q1, q2, [x0] 476; CHECK-NEXT: movi v0.2d, #0x0000000000ffff 477; CHECK-NEXT: ldp q3, q4, [x0, #32] 478; CHECK-NEXT: ushr v5.2d, v1.2d, #16 479; CHECK-NEXT: ushr v6.2d, v2.2d, #16 480; CHECK-NEXT: ushr v20.2d, v1.2d, #32 481; CHECK-NEXT: ushr v7.2d, v3.2d, #16 482; CHECK-NEXT: ushr v17.2d, v4.2d, #16 483; CHECK-NEXT: ushr v22.2d, v2.2d, #32 484; CHECK-NEXT: ushr v23.2d, v3.2d, #32 485; CHECK-NEXT: ushr v24.2d, v4.2d, #32 486; CHECK-NEXT: and v16.16b, v1.16b, v0.16b 487; CHECK-NEXT: and v18.16b, v2.16b, v0.16b 488; CHECK-NEXT: and v19.16b, v3.16b, v0.16b 489; CHECK-NEXT: and v21.16b, v4.16b, v0.16b 490; CHECK-NEXT: and v5.16b, v5.16b, v0.16b 491; CHECK-NEXT: and v6.16b, v6.16b, v0.16b 492; CHECK-NEXT: and v7.16b, v7.16b, v0.16b 493; CHECK-NEXT: and v17.16b, v17.16b, v0.16b 494; CHECK-NEXT: and v20.16b, v20.16b, v0.16b 495; CHECK-NEXT: and v22.16b, v22.16b, v0.16b 496; CHECK-NEXT: and v23.16b, v23.16b, v0.16b 497; CHECK-NEXT: and v0.16b, v24.16b, v0.16b 498; CHECK-NEXT: ushr v1.2d, v1.2d, #48 499; CHECK-NEXT: ushr v2.2d, v2.2d, #48 500; CHECK-NEXT: ushr v3.2d, v3.2d, #48 501; CHECK-NEXT: ushr v4.2d, v4.2d, #48 502; CHECK-NEXT: ucvtf v16.2d, v16.2d 503; CHECK-NEXT: ucvtf v18.2d, v18.2d 504; CHECK-NEXT: ucvtf v19.2d, v19.2d 505; CHECK-NEXT: ucvtf v21.2d, v21.2d 506; CHECK-NEXT: ucvtf v5.2d, v5.2d 507; CHECK-NEXT: ucvtf v6.2d, v6.2d 508; CHECK-NEXT: ucvtf v7.2d, v7.2d 509; CHECK-NEXT: ucvtf v17.2d, v17.2d 510; CHECK-NEXT: ucvtf v20.2d, v20.2d 511; CHECK-NEXT: ucvtf v22.2d, v22.2d 512; CHECK-NEXT: ucvtf v23.2d, v23.2d 513; CHECK-NEXT: ucvtf v0.2d, v0.2d 514; CHECK-NEXT: ucvtf v1.2d, v1.2d 515; CHECK-NEXT: ucvtf v2.2d, v2.2d 516; CHECK-NEXT: ucvtf v3.2d, v3.2d 517; CHECK-NEXT: ucvtf v4.2d, v4.2d 518; CHECK-NEXT: fadd v6.2d, v18.2d, v6.2d 519; CHECK-NEXT: fadd v5.2d, v16.2d, v5.2d 520; CHECK-NEXT: fadd v17.2d, v21.2d, v17.2d 521; CHECK-NEXT: fadd v7.2d, v19.2d, v7.2d 522; CHECK-NEXT: fadd v1.2d, v20.2d, v1.2d 523; CHECK-NEXT: fadd v3.2d, v23.2d, v3.2d 524; CHECK-NEXT: fadd v2.2d, v22.2d, v2.2d 525; CHECK-NEXT: fadd v4.2d, v0.2d, v4.2d 526; CHECK-NEXT: fadd v0.2d, v5.2d, v1.2d 527; CHECK-NEXT: fadd v1.2d, v6.2d, v2.2d 528; CHECK-NEXT: fadd v2.2d, v7.2d, v3.2d 529; CHECK-NEXT: fadd v3.2d, v17.2d, v4.2d 530; CHECK-NEXT: ret 531 %l = load <32 x i16>, ptr %p 532 %s1 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28> 533 %z1 = uitofp <8 x i16> %s1 to <8 x double> 534 %s2 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29> 535 %z2 = uitofp <8 x i16> %s2 to <8 x double> 536 %s3 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30> 537 %z3 = uitofp <8 x i16> %s3 to <8 x double> 538 %s4 = shufflevector <32 x i16> %l, <32 x i16> undef, <8 x i32> <i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31> 539 %z4 = uitofp <8 x i16> %s4 to <8 x double> 540 %a = fadd <8 x double> %z1, %z2 541 %b = fadd <8 x double> %z3, %z4 542 %c = fadd <8 x double> %a, %b 543 ret <8 x double> %c 544} 545 546