1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 < %s | FileCheck %s 3 4define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) { 5; CHECK-LABEL: zext_avgflooru: 6; CHECK: // %bb.0: 7; CHECK-NEXT: uhadd v0.16b, v0.16b, v1.16b 8; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 9; CHECK-NEXT: ushll v0.8h, v0.8b, #0 10; CHECK-NEXT: ret 11 %x0 = zext <16 x i8> %a0 to <16 x i16> 12 %x1 = zext <16 x i8> %a1 to <16 x i16> 13 %and = and <16 x i16> %x0, %x1 14 %xor = xor <16 x i16> %x0, %x1 15 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 16 %avg = add <16 x i16> %and, %shift 17 ret <16 x i16> %avg 18} 19 20define <16 x i16> @zext_avgflooru_mismatch(<16 x i8> %a0, <16 x i4> %a1) { 21; CHECK-LABEL: zext_avgflooru_mismatch: 22; CHECK: // %bb.0: 23; CHECK-NEXT: movi v2.16b, #15 24; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 25; CHECK-NEXT: uhadd v0.16b, v0.16b, v1.16b 26; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 27; CHECK-NEXT: ushll v0.8h, v0.8b, #0 28; CHECK-NEXT: ret 29 %x0 = zext <16 x i8> %a0 to <16 x i16> 30 %x1 = zext <16 x i4> %a1 to <16 x i16> 31 %and = and <16 x i16> %x0, %x1 32 %xor = xor <16 x i16> %x0, %x1 33 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 34 %avg = add <16 x i16> %and, %shift 35 ret <16 x i16> %avg 36} 37 38define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) { 39; CHECK-LABEL: zext_avgceilu: 40; CHECK: // %bb.0: 41; CHECK-NEXT: urhadd v0.16b, v0.16b, v1.16b 42; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 43; CHECK-NEXT: ushll v0.8h, v0.8b, #0 44; CHECK-NEXT: ret 45 %x0 = zext <16 x i8> %a0 to <16 x i16> 46 %x1 = zext <16 x i8> %a1 to <16 x i16> 47 %or = or <16 x i16> %x0, %x1 48 %xor = xor <16 x i16> %x0, %x1 49 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 50 %avg = sub <16 x i16> %or, %shift 51 ret <16 x i16> %avg 52} 53 54define <16 x i16> @zext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) { 55; CHECK-LABEL: zext_avgceilu_mismatch: 56; CHECK: // %bb.0: 57; CHECK-NEXT: movi v2.16b, #15 58; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 59; CHECK-NEXT: urhadd v0.16b, v0.16b, v1.16b 60; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 61; CHECK-NEXT: ushll v0.8h, v0.8b, #0 62; CHECK-NEXT: ret 63 %x0 = zext <16 x i4> %a0 to <16 x i16> 64 %x1 = zext <16 x i8> %a1 to <16 x i16> 65 %or = or <16 x i16> %x0, %x1 66 %xor = xor <16 x i16> %x0, %x1 67 %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 68 %avg = sub <16 x i16> %or, %shift 69 ret <16 x i16> %avg 70} 71 72define <16 x i16> @sext_avgfloors(<16 x i8> %a0, <16 x i8> %a1) { 73; CHECK-LABEL: sext_avgfloors: 74; CHECK: // %bb.0: 75; CHECK-NEXT: shadd v0.16b, v0.16b, v1.16b 76; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 77; CHECK-NEXT: sshll v0.8h, v0.8b, #0 78; CHECK-NEXT: ret 79 %x0 = sext <16 x i8> %a0 to <16 x i16> 80 %x1 = sext <16 x i8> %a1 to <16 x i16> 81 %and = and <16 x i16> %x0, %x1 82 %xor = xor <16 x i16> %x0, %x1 83 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 84 %avg = add <16 x i16> %and, %shift 85 ret <16 x i16> %avg 86} 87 88define <16 x i16> @sext_avgfloors_mismatch(<16 x i8> %a0, <16 x i4> %a1) { 89; CHECK-LABEL: sext_avgfloors_mismatch: 90; CHECK: // %bb.0: 91; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0 92; CHECK-NEXT: ushll v1.8h, v1.8b, #0 93; CHECK-NEXT: sshll v3.8h, v0.8b, #0 94; CHECK-NEXT: sshll2 v0.8h, v0.16b, #0 95; CHECK-NEXT: shl v1.8h, v1.8h, #12 96; CHECK-NEXT: shl v2.8h, v2.8h, #12 97; CHECK-NEXT: sshr v4.8h, v1.8h, #12 98; CHECK-NEXT: sshr v1.8h, v2.8h, #12 99; CHECK-NEXT: shadd v1.8h, v0.8h, v1.8h 100; CHECK-NEXT: shadd v0.8h, v3.8h, v4.8h 101; CHECK-NEXT: ret 102 %x0 = sext <16 x i8> %a0 to <16 x i16> 103 %x1 = sext <16 x i4> %a1 to <16 x i16> 104 %and = and <16 x i16> %x0, %x1 105 %xor = xor <16 x i16> %x0, %x1 106 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 107 %avg = add <16 x i16> %and, %shift 108 ret <16 x i16> %avg 109} 110 111define <16 x i16> @sext_avgceils(<16 x i8> %a0, <16 x i8> %a1) { 112; CHECK-LABEL: sext_avgceils: 113; CHECK: // %bb.0: 114; CHECK-NEXT: srhadd v0.16b, v0.16b, v1.16b 115; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 116; CHECK-NEXT: sshll v0.8h, v0.8b, #0 117; CHECK-NEXT: ret 118 %x0 = sext <16 x i8> %a0 to <16 x i16> 119 %x1 = sext <16 x i8> %a1 to <16 x i16> 120 %or = or <16 x i16> %x0, %x1 121 %xor = xor <16 x i16> %x0, %x1 122 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 123 %avg = sub <16 x i16> %or, %shift 124 ret <16 x i16> %avg 125} 126 127define <16 x i16> @sext_avgceils_mismatch(<16 x i4> %a0, <16 x i8> %a1) { 128; CHECK-LABEL: sext_avgceils_mismatch: 129; CHECK: // %bb.0: 130; CHECK-NEXT: ushll v2.8h, v0.8b, #0 131; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 132; CHECK-NEXT: sshll v3.8h, v1.8b, #0 133; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0 134; CHECK-NEXT: shl v2.8h, v2.8h, #12 135; CHECK-NEXT: shl v0.8h, v0.8h, #12 136; CHECK-NEXT: sshr v2.8h, v2.8h, #12 137; CHECK-NEXT: sshr v0.8h, v0.8h, #12 138; CHECK-NEXT: srhadd v1.8h, v0.8h, v1.8h 139; CHECK-NEXT: srhadd v0.8h, v2.8h, v3.8h 140; CHECK-NEXT: ret 141 %x0 = sext <16 x i4> %a0 to <16 x i16> 142 %x1 = sext <16 x i8> %a1 to <16 x i16> 143 %or = or <16 x i16> %x0, %x1 144 %xor = xor <16 x i16> %x0, %x1 145 %shift = ashr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 146 %avg = sub <16 x i16> %or, %shift 147 ret <16 x i16> %avg 148} 149 150define <8 x i16> @add_avgflooru(<8 x i16> %a0, <8 x i16> %a1) { 151; CHECK-LABEL: add_avgflooru: 152; CHECK: // %bb.0: 153; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h 154; CHECK-NEXT: ret 155 %add = add nuw <8 x i16> %a0, %a1 156 %avg = lshr <8 x i16> %add, splat(i16 1) 157 ret <8 x i16> %avg 158} 159 160define <8 x i16> @add_avgflooru_mismatch(<8 x i16> %a0, <8 x i16> %a1) { 161; CHECK-LABEL: add_avgflooru_mismatch: 162; CHECK: // %bb.0: 163; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 164; CHECK-NEXT: ushr v0.8h, v0.8h, #1 165; CHECK-NEXT: ret 166 %add = add <8 x i16> %a0, %a1 167 %avg = lshr <8 x i16> %add, splat(i16 1) 168 ret <8 x i16> %avg 169} 170 171define <8 x i16> @add_avgceilu(<8 x i16> %a0, <8 x i16> %a1) { 172; CHECK-LABEL: add_avgceilu: 173; CHECK: // %bb.0: 174; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h 175; CHECK-NEXT: ret 176 %add0 = add nuw <8 x i16> %a0, splat(i16 1) 177 %add = add nuw <8 x i16> %a1, %add0 178 %avg = lshr <8 x i16> %add, splat(i16 1) 179 ret <8 x i16> %avg 180} 181 182define <8 x i16> @add_avgceilu2(<8 x i16> %a0, <8 x i16> %a1) { 183; CHECK-LABEL: add_avgceilu2: 184; CHECK: // %bb.0: 185; CHECK-NEXT: urhadd v0.8h, v1.8h, v0.8h 186; CHECK-NEXT: ret 187 %add0 = add nuw <8 x i16> %a1, %a0 188 %add = add nuw <8 x i16> %add0, splat(i16 1) 189 %avg = lshr <8 x i16> %add, splat(i16 1) 190 ret <8 x i16> %avg 191} 192 193define <8 x i16> @add_avgceilu_mismatch1(<8 x i16> %a0, <8 x i16> %a1) { 194; CHECK-LABEL: add_avgceilu_mismatch1: 195; CHECK: // %bb.0: 196; CHECK-NEXT: movi v2.8h, #1 197; CHECK-NEXT: add v0.8h, v1.8h, v0.8h 198; CHECK-NEXT: uhadd v0.8h, v0.8h, v2.8h 199; CHECK-NEXT: ret 200 %add0 = add <8 x i16> %a1, %a0 201 %add = add nuw <8 x i16> %add0, splat(i16 1) 202 %avg = lshr <8 x i16> %add, splat(i16 1) 203 ret <8 x i16> %avg 204} 205 206define <8 x i16> @add_avgceilu_mismatch2(<8 x i16> %a0, <8 x i16> %a1) { 207; CHECK-LABEL: add_avgceilu_mismatch2: 208; CHECK: // %bb.0: 209; CHECK-NEXT: mvn v1.16b, v1.16b 210; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h 211; CHECK-NEXT: ushr v0.8h, v0.8h, #1 212; CHECK-NEXT: ret 213 %add0 = add nuw <8 x i16> %a1, %a0 214 %add = add <8 x i16> %add0, splat(i16 1) 215 %avg = lshr <8 x i16> %add, splat(i16 1) 216 ret <8 x i16> %avg 217} 218 219define <8 x i16> @add_avgceilu_mismatch3(<8 x i16> %a0, <8 x i16> %a1) { 220; CHECK-LABEL: add_avgceilu_mismatch3: 221; CHECK: // %bb.0: 222; CHECK-NEXT: mvn v1.16b, v1.16b 223; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h 224; CHECK-NEXT: ushr v0.8h, v0.8h, #1 225; CHECK-NEXT: ret 226 %add0 = add nuw <8 x i16> %a1, %a0 227 %add = add <8 x i16> %add0, splat(i16 1) 228 %avg = lshr <8 x i16> %add, splat(i16 1) 229 ret <8 x i16> %avg 230} 231 232define <8 x i16> @add_avgfloors(<8 x i16> %a0, <8 x i16> %a1) { 233; CHECK-LABEL: add_avgfloors: 234; CHECK: // %bb.0: 235; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h 236; CHECK-NEXT: ret 237 %add = add nsw <8 x i16> %a0, %a1 238 %avg = ashr <8 x i16> %add, splat(i16 1) 239 ret <8 x i16> %avg 240} 241 242define <8 x i16> @add_avgfloors_mismatch(<8 x i16> %a0, <8 x i16> %a1) { 243; CHECK-LABEL: add_avgfloors_mismatch: 244; CHECK: // %bb.0: 245; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 246; CHECK-NEXT: sshr v0.8h, v0.8h, #1 247; CHECK-NEXT: ret 248 %add = add <8 x i16> %a0, %a1 249 %avg = ashr <8 x i16> %add, splat(i16 1) 250 ret <8 x i16> %avg 251} 252 253define <8 x i16> @add_avgfoor_mismatch2(<8 x i16> %a0, <8 x i16> %a1) { 254; CHECK-LABEL: add_avgfoor_mismatch2: 255; CHECK: // %bb.0: 256; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 257; CHECK-NEXT: sshr v0.8h, v0.8h, #2 258; CHECK-NEXT: ret 259 %add = add nsw <8 x i16> %a0, %a1 260 %avg = ashr <8 x i16> %add, splat(i16 2) 261 ret <8 x i16> %avg 262} 263 264define <8 x i16> @add_avgceils(<8 x i16> %a0, <8 x i16> %a1) { 265; CHECK-LABEL: add_avgceils: 266; CHECK: // %bb.0: 267; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h 268; CHECK-NEXT: ret 269 %add0 = add nsw <8 x i16> %a0, splat(i16 1) 270 %add = add nsw <8 x i16> %a1, %add0 271 %avg = ashr <8 x i16> %add, splat(i16 1) 272 ret <8 x i16> %avg 273} 274 275define <8 x i16> @add_avgceils2(<8 x i16> %a0, <8 x i16> %a1) { 276; CHECK-LABEL: add_avgceils2: 277; CHECK: // %bb.0: 278; CHECK-NEXT: srhadd v0.8h, v1.8h, v0.8h 279; CHECK-NEXT: ret 280 %add0 = add nsw <8 x i16> %a1, %a0 281 %add = add nsw <8 x i16> %add0, splat(i16 1) 282 %avg = ashr <8 x i16> %add, splat(i16 1) 283 ret <8 x i16> %avg 284} 285 286define <8 x i16> @add_avgceils_mismatch1(<8 x i16> %a0, <8 x i16> %a1) { 287; CHECK-LABEL: add_avgceils_mismatch1: 288; CHECK: // %bb.0: 289; CHECK-NEXT: movi v2.8h, #1 290; CHECK-NEXT: add v0.8h, v1.8h, v0.8h 291; CHECK-NEXT: shadd v0.8h, v0.8h, v2.8h 292; CHECK-NEXT: ret 293 %add0 = add <8 x i16> %a1, %a0 294 %add = add nsw <8 x i16> %add0, splat(i16 1) 295 %avg = ashr <8 x i16> %add, splat(i16 1) 296 ret <8 x i16> %avg 297} 298 299define <8 x i16> @add_avgceils_mismatch2(<8 x i16> %a0, <8 x i16> %a1) { 300; CHECK-LABEL: add_avgceils_mismatch2: 301; CHECK: // %bb.0: 302; CHECK-NEXT: mvn v1.16b, v1.16b 303; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h 304; CHECK-NEXT: sshr v0.8h, v0.8h, #1 305; CHECK-NEXT: ret 306 %add0 = add nsw <8 x i16> %a1, %a0 307 %add = add <8 x i16> %add0, splat(i16 1) 308 %avg = ashr <8 x i16> %add, splat(i16 1) 309 ret <8 x i16> %avg 310} 311 312define <8 x i16> @add_avgceils_mismatch3(<8 x i16> %a0, <8 x i16> %a1) { 313; CHECK-LABEL: add_avgceils_mismatch3: 314; CHECK: // %bb.0: 315; CHECK-NEXT: mvn v1.16b, v1.16b 316; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h 317; CHECK-NEXT: sshr v0.8h, v0.8h, #1 318; CHECK-NEXT: ret 319 %add0 = add nsw <8 x i16> %a1, %a0 320 %add = add <8 x i16> %add0, splat(i16 1) 321 %avg = ashr <8 x i16> %add, splat(i16 1) 322 ret <8 x i16> %avg 323} 324 325define <8 x i16> @add_avgceils_mismatch4(<8 x i16> %a0, <8 x i16> %a1) { 326; CHECK-LABEL: add_avgceils_mismatch4: 327; CHECK: // %bb.0: 328; CHECK-NEXT: mvn v0.16b, v0.16b 329; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h 330; CHECK-NEXT: sshr v0.8h, v0.8h, #2 331; CHECK-NEXT: ret 332 %add0 = add nsw <8 x i16> %a0, splat(i16 1) 333 %add = add nsw <8 x i16> %a1, %add0 334 %avg = ashr <8 x i16> %add, splat(i16 2) 335 ret <8 x i16> %avg 336} 337 338define <8 x i16> @add_avgceilu_mismatch(<8 x i16> %a0, <8 x i16> %a1) { 339; CHECK-LABEL: add_avgceilu_mismatch: 340; CHECK: // %bb.0: 341; CHECK-NEXT: movi v2.8h, #1 342; CHECK-NEXT: add v0.8h, v1.8h, v0.8h 343; CHECK-NEXT: add v0.8h, v0.8h, v2.8h 344; CHECK-NEXT: ushr v0.8h, v0.8h, #2 345; CHECK-NEXT: ret 346 %add0 = add nuw <8 x i16> %a1, %a0 347 %add = add nuw <8 x i16> %add0, splat(i16 1) 348 %avg = lshr <8 x i16> %add, splat(i16 2) 349 ret <8 x i16> %avg 350} 351