1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 3; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 4 5define <8 x i8> @smax_8b(ptr %A, ptr %B) nounwind { 6; CHECK-LABEL: smax_8b: 7; CHECK: // %bb.0: 8; CHECK-NEXT: ldr d0, [x0] 9; CHECK-NEXT: ldr d1, [x1] 10; CHECK-NEXT: smax.8b v0, v0, v1 11; CHECK-NEXT: ret 12 %tmp1 = load <8 x i8>, ptr %A 13 %tmp2 = load <8 x i8>, ptr %B 14 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 15 ret <8 x i8> %tmp3 16} 17 18define <16 x i8> @smax_16b(ptr %A, ptr %B) nounwind { 19; CHECK-LABEL: smax_16b: 20; CHECK: // %bb.0: 21; CHECK-NEXT: ldr q0, [x0] 22; CHECK-NEXT: ldr q1, [x1] 23; CHECK-NEXT: smax.16b v0, v0, v1 24; CHECK-NEXT: ret 25 %tmp1 = load <16 x i8>, ptr %A 26 %tmp2 = load <16 x i8>, ptr %B 27 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 28 ret <16 x i8> %tmp3 29} 30 31define <4 x i16> @smax_4h(ptr %A, ptr %B) nounwind { 32; CHECK-LABEL: smax_4h: 33; CHECK: // %bb.0: 34; CHECK-NEXT: ldr d0, [x0] 35; CHECK-NEXT: ldr d1, [x1] 36; CHECK-NEXT: smax.4h v0, v0, v1 37; CHECK-NEXT: ret 38 %tmp1 = load <4 x i16>, ptr %A 39 %tmp2 = load <4 x i16>, ptr %B 40 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 41 ret <4 x i16> %tmp3 42} 43 44define <8 x i16> @smax_8h(ptr %A, ptr %B) nounwind { 45; CHECK-LABEL: smax_8h: 46; CHECK: // %bb.0: 47; CHECK-NEXT: ldr q0, [x0] 48; CHECK-NEXT: ldr q1, [x1] 49; CHECK-NEXT: smax.8h v0, v0, v1 50; CHECK-NEXT: ret 51 %tmp1 = load <8 x i16>, ptr %A 52 %tmp2 = load <8 x i16>, ptr %B 53 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 54 ret <8 x i16> %tmp3 55} 56 57define <2 x i32> @smax_2s(ptr %A, ptr %B) nounwind { 58; CHECK-LABEL: smax_2s: 59; CHECK: // %bb.0: 60; CHECK-NEXT: ldr d0, [x0] 61; CHECK-NEXT: ldr d1, [x1] 62; CHECK-NEXT: smax.2s v0, v0, v1 63; CHECK-NEXT: ret 64 %tmp1 = load <2 x i32>, ptr %A 65 %tmp2 = load <2 x i32>, ptr %B 66 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 67 ret <2 x i32> %tmp3 68} 69 70define <4 x i32> @smax_4s(ptr %A, ptr %B) nounwind { 71; CHECK-LABEL: smax_4s: 72; CHECK: // %bb.0: 73; CHECK-NEXT: ldr q0, [x0] 74; CHECK-NEXT: ldr q1, [x1] 75; CHECK-NEXT: smax.4s v0, v0, v1 76; CHECK-NEXT: ret 77 %tmp1 = load <4 x i32>, ptr %A 78 %tmp2 = load <4 x i32>, ptr %B 79 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 80 ret <4 x i32> %tmp3 81} 82 83declare <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 84declare <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 85declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 86declare <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 87declare <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 88declare <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 89 90define <8 x i8> @umax_8b(ptr %A, ptr %B) nounwind { 91; CHECK-LABEL: umax_8b: 92; CHECK: // %bb.0: 93; CHECK-NEXT: ldr d0, [x0] 94; CHECK-NEXT: ldr d1, [x1] 95; CHECK-NEXT: umax.8b v0, v0, v1 96; CHECK-NEXT: ret 97 %tmp1 = load <8 x i8>, ptr %A 98 %tmp2 = load <8 x i8>, ptr %B 99 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 100 ret <8 x i8> %tmp3 101} 102 103define <16 x i8> @umax_16b(ptr %A, ptr %B) nounwind { 104; CHECK-LABEL: umax_16b: 105; CHECK: // %bb.0: 106; CHECK-NEXT: ldr q0, [x0] 107; CHECK-NEXT: ldr q1, [x1] 108; CHECK-NEXT: umax.16b v0, v0, v1 109; CHECK-NEXT: ret 110 %tmp1 = load <16 x i8>, ptr %A 111 %tmp2 = load <16 x i8>, ptr %B 112 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 113 ret <16 x i8> %tmp3 114} 115 116define <4 x i16> @umax_4h(ptr %A, ptr %B) nounwind { 117; CHECK-LABEL: umax_4h: 118; CHECK: // %bb.0: 119; CHECK-NEXT: ldr d0, [x0] 120; CHECK-NEXT: ldr d1, [x1] 121; CHECK-NEXT: umax.4h v0, v0, v1 122; CHECK-NEXT: ret 123 %tmp1 = load <4 x i16>, ptr %A 124 %tmp2 = load <4 x i16>, ptr %B 125 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 126 ret <4 x i16> %tmp3 127} 128 129define <8 x i16> @umax_8h(ptr %A, ptr %B) nounwind { 130; CHECK-LABEL: umax_8h: 131; CHECK: // %bb.0: 132; CHECK-NEXT: ldr q0, [x0] 133; CHECK-NEXT: ldr q1, [x1] 134; CHECK-NEXT: umax.8h v0, v0, v1 135; CHECK-NEXT: ret 136 %tmp1 = load <8 x i16>, ptr %A 137 %tmp2 = load <8 x i16>, ptr %B 138 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 139 ret <8 x i16> %tmp3 140} 141 142define <2 x i32> @umax_2s(ptr %A, ptr %B) nounwind { 143; CHECK-LABEL: umax_2s: 144; CHECK: // %bb.0: 145; CHECK-NEXT: ldr d0, [x0] 146; CHECK-NEXT: ldr d1, [x1] 147; CHECK-NEXT: umax.2s v0, v0, v1 148; CHECK-NEXT: ret 149 %tmp1 = load <2 x i32>, ptr %A 150 %tmp2 = load <2 x i32>, ptr %B 151 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 152 ret <2 x i32> %tmp3 153} 154 155define <4 x i32> @umax_4s(ptr %A, ptr %B) nounwind { 156; CHECK-LABEL: umax_4s: 157; CHECK: // %bb.0: 158; CHECK-NEXT: ldr q0, [x0] 159; CHECK-NEXT: ldr q1, [x1] 160; CHECK-NEXT: umax.4s v0, v0, v1 161; CHECK-NEXT: ret 162 %tmp1 = load <4 x i32>, ptr %A 163 %tmp2 = load <4 x i32>, ptr %B 164 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 165 ret <4 x i32> %tmp3 166} 167 168declare <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 169declare <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 170declare <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 171declare <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 172declare <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 173declare <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 174 175define <8 x i8> @smin_8b(ptr %A, ptr %B) nounwind { 176; CHECK-LABEL: smin_8b: 177; CHECK: // %bb.0: 178; CHECK-NEXT: ldr d0, [x0] 179; CHECK-NEXT: ldr d1, [x1] 180; CHECK-NEXT: smin.8b v0, v0, v1 181; CHECK-NEXT: ret 182 %tmp1 = load <8 x i8>, ptr %A 183 %tmp2 = load <8 x i8>, ptr %B 184 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 185 ret <8 x i8> %tmp3 186} 187 188define <16 x i8> @smin_16b(ptr %A, ptr %B) nounwind { 189; CHECK-LABEL: smin_16b: 190; CHECK: // %bb.0: 191; CHECK-NEXT: ldr q0, [x0] 192; CHECK-NEXT: ldr q1, [x1] 193; CHECK-NEXT: smin.16b v0, v0, v1 194; CHECK-NEXT: ret 195 %tmp1 = load <16 x i8>, ptr %A 196 %tmp2 = load <16 x i8>, ptr %B 197 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 198 ret <16 x i8> %tmp3 199} 200 201define <4 x i16> @smin_4h(ptr %A, ptr %B) nounwind { 202; CHECK-LABEL: smin_4h: 203; CHECK: // %bb.0: 204; CHECK-NEXT: ldr d0, [x0] 205; CHECK-NEXT: ldr d1, [x1] 206; CHECK-NEXT: smin.4h v0, v0, v1 207; CHECK-NEXT: ret 208 %tmp1 = load <4 x i16>, ptr %A 209 %tmp2 = load <4 x i16>, ptr %B 210 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 211 ret <4 x i16> %tmp3 212} 213 214define <8 x i16> @smin_8h(ptr %A, ptr %B) nounwind { 215; CHECK-LABEL: smin_8h: 216; CHECK: // %bb.0: 217; CHECK-NEXT: ldr q0, [x0] 218; CHECK-NEXT: ldr q1, [x1] 219; CHECK-NEXT: smin.8h v0, v0, v1 220; CHECK-NEXT: ret 221 %tmp1 = load <8 x i16>, ptr %A 222 %tmp2 = load <8 x i16>, ptr %B 223 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 224 ret <8 x i16> %tmp3 225} 226 227define <2 x i32> @smin_2s(ptr %A, ptr %B) nounwind { 228; CHECK-LABEL: smin_2s: 229; CHECK: // %bb.0: 230; CHECK-NEXT: ldr d0, [x0] 231; CHECK-NEXT: ldr d1, [x1] 232; CHECK-NEXT: smin.2s v0, v0, v1 233; CHECK-NEXT: ret 234 %tmp1 = load <2 x i32>, ptr %A 235 %tmp2 = load <2 x i32>, ptr %B 236 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 237 ret <2 x i32> %tmp3 238} 239 240define <4 x i32> @smin_4s(ptr %A, ptr %B) nounwind { 241; CHECK-LABEL: smin_4s: 242; CHECK: // %bb.0: 243; CHECK-NEXT: ldr q0, [x0] 244; CHECK-NEXT: ldr q1, [x1] 245; CHECK-NEXT: smin.4s v0, v0, v1 246; CHECK-NEXT: ret 247 %tmp1 = load <4 x i32>, ptr %A 248 %tmp2 = load <4 x i32>, ptr %B 249 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 250 ret <4 x i32> %tmp3 251} 252 253declare <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 254declare <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 255declare <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 256declare <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 257declare <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 258declare <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 259 260define <8 x i8> @umin_8b(ptr %A, ptr %B) nounwind { 261; CHECK-LABEL: umin_8b: 262; CHECK: // %bb.0: 263; CHECK-NEXT: ldr d0, [x0] 264; CHECK-NEXT: ldr d1, [x1] 265; CHECK-NEXT: umin.8b v0, v0, v1 266; CHECK-NEXT: ret 267 %tmp1 = load <8 x i8>, ptr %A 268 %tmp2 = load <8 x i8>, ptr %B 269 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 270 ret <8 x i8> %tmp3 271} 272 273define <16 x i8> @umin_16b(ptr %A, ptr %B) nounwind { 274; CHECK-LABEL: umin_16b: 275; CHECK: // %bb.0: 276; CHECK-NEXT: ldr q0, [x0] 277; CHECK-NEXT: ldr q1, [x1] 278; CHECK-NEXT: umin.16b v0, v0, v1 279; CHECK-NEXT: ret 280 %tmp1 = load <16 x i8>, ptr %A 281 %tmp2 = load <16 x i8>, ptr %B 282 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 283 ret <16 x i8> %tmp3 284} 285 286define <4 x i16> @umin_4h(ptr %A, ptr %B) nounwind { 287; CHECK-LABEL: umin_4h: 288; CHECK: // %bb.0: 289; CHECK-NEXT: ldr d0, [x0] 290; CHECK-NEXT: ldr d1, [x1] 291; CHECK-NEXT: umin.4h v0, v0, v1 292; CHECK-NEXT: ret 293 %tmp1 = load <4 x i16>, ptr %A 294 %tmp2 = load <4 x i16>, ptr %B 295 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 296 ret <4 x i16> %tmp3 297} 298 299define <8 x i16> @umin_8h(ptr %A, ptr %B) nounwind { 300; CHECK-LABEL: umin_8h: 301; CHECK: // %bb.0: 302; CHECK-NEXT: ldr q0, [x0] 303; CHECK-NEXT: ldr q1, [x1] 304; CHECK-NEXT: umin.8h v0, v0, v1 305; CHECK-NEXT: ret 306 %tmp1 = load <8 x i16>, ptr %A 307 %tmp2 = load <8 x i16>, ptr %B 308 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 309 ret <8 x i16> %tmp3 310} 311 312define <2 x i32> @umin_2s(ptr %A, ptr %B) nounwind { 313; CHECK-LABEL: umin_2s: 314; CHECK: // %bb.0: 315; CHECK-NEXT: ldr d0, [x0] 316; CHECK-NEXT: ldr d1, [x1] 317; CHECK-NEXT: umin.2s v0, v0, v1 318; CHECK-NEXT: ret 319 %tmp1 = load <2 x i32>, ptr %A 320 %tmp2 = load <2 x i32>, ptr %B 321 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 322 ret <2 x i32> %tmp3 323} 324 325define <4 x i32> @umin_4s(ptr %A, ptr %B) nounwind { 326; CHECK-LABEL: umin_4s: 327; CHECK: // %bb.0: 328; CHECK-NEXT: ldr q0, [x0] 329; CHECK-NEXT: ldr q1, [x1] 330; CHECK-NEXT: umin.4s v0, v0, v1 331; CHECK-NEXT: ret 332 %tmp1 = load <4 x i32>, ptr %A 333 %tmp2 = load <4 x i32>, ptr %B 334 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 335 ret <4 x i32> %tmp3 336} 337 338declare <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 339declare <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 340declare <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 341declare <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 342declare <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 343declare <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 344 345; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 346 347define <8 x i8> @smaxp_8b(ptr %A, ptr %B) nounwind { 348; CHECK-LABEL: smaxp_8b: 349; CHECK: // %bb.0: 350; CHECK-NEXT: ldr d0, [x0] 351; CHECK-NEXT: ldr d1, [x1] 352; CHECK-NEXT: smaxp.8b v0, v0, v1 353; CHECK-NEXT: ret 354 %tmp1 = load <8 x i8>, ptr %A 355 %tmp2 = load <8 x i8>, ptr %B 356 %tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 357 ret <8 x i8> %tmp3 358} 359 360define <16 x i8> @smaxp_16b(ptr %A, ptr %B) nounwind { 361; CHECK-LABEL: smaxp_16b: 362; CHECK: // %bb.0: 363; CHECK-NEXT: ldr q0, [x0] 364; CHECK-NEXT: ldr q1, [x1] 365; CHECK-NEXT: smaxp.16b v0, v0, v1 366; CHECK-NEXT: ret 367 %tmp1 = load <16 x i8>, ptr %A 368 %tmp2 = load <16 x i8>, ptr %B 369 %tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 370 ret <16 x i8> %tmp3 371} 372 373define <4 x i16> @smaxp_4h(ptr %A, ptr %B) nounwind { 374; CHECK-LABEL: smaxp_4h: 375; CHECK: // %bb.0: 376; CHECK-NEXT: ldr d0, [x0] 377; CHECK-NEXT: ldr d1, [x1] 378; CHECK-NEXT: smaxp.4h v0, v0, v1 379; CHECK-NEXT: ret 380 %tmp1 = load <4 x i16>, ptr %A 381 %tmp2 = load <4 x i16>, ptr %B 382 %tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 383 ret <4 x i16> %tmp3 384} 385 386define <8 x i16> @smaxp_8h(ptr %A, ptr %B) nounwind { 387; CHECK-LABEL: smaxp_8h: 388; CHECK: // %bb.0: 389; CHECK-NEXT: ldr q0, [x0] 390; CHECK-NEXT: ldr q1, [x1] 391; CHECK-NEXT: smaxp.8h v0, v0, v1 392; CHECK-NEXT: ret 393 %tmp1 = load <8 x i16>, ptr %A 394 %tmp2 = load <8 x i16>, ptr %B 395 %tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 396 ret <8 x i16> %tmp3 397} 398 399define <2 x i32> @smaxp_2s(ptr %A, ptr %B) nounwind { 400; CHECK-LABEL: smaxp_2s: 401; CHECK: // %bb.0: 402; CHECK-NEXT: ldr d0, [x0] 403; CHECK-NEXT: ldr d1, [x1] 404; CHECK-NEXT: smaxp.2s v0, v0, v1 405; CHECK-NEXT: ret 406 %tmp1 = load <2 x i32>, ptr %A 407 %tmp2 = load <2 x i32>, ptr %B 408 %tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 409 ret <2 x i32> %tmp3 410} 411 412define <4 x i32> @smaxp_4s(ptr %A, ptr %B) nounwind { 413; CHECK-LABEL: smaxp_4s: 414; CHECK: // %bb.0: 415; CHECK-NEXT: ldr q0, [x0] 416; CHECK-NEXT: ldr q1, [x1] 417; CHECK-NEXT: smaxp.4s v0, v0, v1 418; CHECK-NEXT: ret 419 %tmp1 = load <4 x i32>, ptr %A 420 %tmp2 = load <4 x i32>, ptr %B 421 %tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 422 ret <4 x i32> %tmp3 423} 424 425declare <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 426declare <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 427declare <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 428declare <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 429declare <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 430declare <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 431 432define <8 x i8> @umaxp_8b(ptr %A, ptr %B) nounwind { 433; CHECK-LABEL: umaxp_8b: 434; CHECK: // %bb.0: 435; CHECK-NEXT: ldr d0, [x0] 436; CHECK-NEXT: ldr d1, [x1] 437; CHECK-NEXT: umaxp.8b v0, v0, v1 438; CHECK-NEXT: ret 439 %tmp1 = load <8 x i8>, ptr %A 440 %tmp2 = load <8 x i8>, ptr %B 441 %tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 442 ret <8 x i8> %tmp3 443} 444 445define <16 x i8> @umaxp_16b(ptr %A, ptr %B) nounwind { 446; CHECK-LABEL: umaxp_16b: 447; CHECK: // %bb.0: 448; CHECK-NEXT: ldr q0, [x0] 449; CHECK-NEXT: ldr q1, [x1] 450; CHECK-NEXT: umaxp.16b v0, v0, v1 451; CHECK-NEXT: ret 452 %tmp1 = load <16 x i8>, ptr %A 453 %tmp2 = load <16 x i8>, ptr %B 454 %tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 455 ret <16 x i8> %tmp3 456} 457 458define <4 x i16> @umaxp_4h(ptr %A, ptr %B) nounwind { 459; CHECK-LABEL: umaxp_4h: 460; CHECK: // %bb.0: 461; CHECK-NEXT: ldr d0, [x0] 462; CHECK-NEXT: ldr d1, [x1] 463; CHECK-NEXT: umaxp.4h v0, v0, v1 464; CHECK-NEXT: ret 465 %tmp1 = load <4 x i16>, ptr %A 466 %tmp2 = load <4 x i16>, ptr %B 467 %tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 468 ret <4 x i16> %tmp3 469} 470 471define <8 x i16> @umaxp_8h(ptr %A, ptr %B) nounwind { 472; CHECK-LABEL: umaxp_8h: 473; CHECK: // %bb.0: 474; CHECK-NEXT: ldr q0, [x0] 475; CHECK-NEXT: ldr q1, [x1] 476; CHECK-NEXT: umaxp.8h v0, v0, v1 477; CHECK-NEXT: ret 478 %tmp1 = load <8 x i16>, ptr %A 479 %tmp2 = load <8 x i16>, ptr %B 480 %tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 481 ret <8 x i16> %tmp3 482} 483 484define <2 x i32> @umaxp_2s(ptr %A, ptr %B) nounwind { 485; CHECK-LABEL: umaxp_2s: 486; CHECK: // %bb.0: 487; CHECK-NEXT: ldr d0, [x0] 488; CHECK-NEXT: ldr d1, [x1] 489; CHECK-NEXT: umaxp.2s v0, v0, v1 490; CHECK-NEXT: ret 491 %tmp1 = load <2 x i32>, ptr %A 492 %tmp2 = load <2 x i32>, ptr %B 493 %tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 494 ret <2 x i32> %tmp3 495} 496 497define <4 x i32> @umaxp_4s(ptr %A, ptr %B) nounwind { 498; CHECK-LABEL: umaxp_4s: 499; CHECK: // %bb.0: 500; CHECK-NEXT: ldr q0, [x0] 501; CHECK-NEXT: ldr q1, [x1] 502; CHECK-NEXT: umaxp.4s v0, v0, v1 503; CHECK-NEXT: ret 504 %tmp1 = load <4 x i32>, ptr %A 505 %tmp2 = load <4 x i32>, ptr %B 506 %tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 507 ret <4 x i32> %tmp3 508} 509 510declare <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 511declare <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 512declare <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 513declare <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 514declare <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 515declare <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 516 517; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s 518 519define <8 x i8> @sminp_8b(ptr %A, ptr %B) nounwind { 520; CHECK-LABEL: sminp_8b: 521; CHECK: // %bb.0: 522; CHECK-NEXT: ldr d0, [x0] 523; CHECK-NEXT: ldr d1, [x1] 524; CHECK-NEXT: sminp.8b v0, v0, v1 525; CHECK-NEXT: ret 526 %tmp1 = load <8 x i8>, ptr %A 527 %tmp2 = load <8 x i8>, ptr %B 528 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 529 ret <8 x i8> %tmp3 530} 531 532define <16 x i8> @sminp_16b(ptr %A, ptr %B) nounwind { 533; CHECK-LABEL: sminp_16b: 534; CHECK: // %bb.0: 535; CHECK-NEXT: ldr q0, [x0] 536; CHECK-NEXT: ldr q1, [x1] 537; CHECK-NEXT: sminp.16b v0, v0, v1 538; CHECK-NEXT: ret 539 %tmp1 = load <16 x i8>, ptr %A 540 %tmp2 = load <16 x i8>, ptr %B 541 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 542 ret <16 x i8> %tmp3 543} 544 545define <4 x i16> @sminp_4h(ptr %A, ptr %B) nounwind { 546; CHECK-LABEL: sminp_4h: 547; CHECK: // %bb.0: 548; CHECK-NEXT: ldr d0, [x0] 549; CHECK-NEXT: ldr d1, [x1] 550; CHECK-NEXT: sminp.4h v0, v0, v1 551; CHECK-NEXT: ret 552 %tmp1 = load <4 x i16>, ptr %A 553 %tmp2 = load <4 x i16>, ptr %B 554 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 555 ret <4 x i16> %tmp3 556} 557 558define <8 x i16> @sminp_8h(ptr %A, ptr %B) nounwind { 559; CHECK-LABEL: sminp_8h: 560; CHECK: // %bb.0: 561; CHECK-NEXT: ldr q0, [x0] 562; CHECK-NEXT: ldr q1, [x1] 563; CHECK-NEXT: sminp.8h v0, v0, v1 564; CHECK-NEXT: ret 565 %tmp1 = load <8 x i16>, ptr %A 566 %tmp2 = load <8 x i16>, ptr %B 567 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 568 ret <8 x i16> %tmp3 569} 570 571define <2 x i32> @sminp_2s(ptr %A, ptr %B) nounwind { 572; CHECK-LABEL: sminp_2s: 573; CHECK: // %bb.0: 574; CHECK-NEXT: ldr d0, [x0] 575; CHECK-NEXT: ldr d1, [x1] 576; CHECK-NEXT: sminp.2s v0, v0, v1 577; CHECK-NEXT: ret 578 %tmp1 = load <2 x i32>, ptr %A 579 %tmp2 = load <2 x i32>, ptr %B 580 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 581 ret <2 x i32> %tmp3 582} 583 584define <4 x i32> @sminp_4s(ptr %A, ptr %B) nounwind { 585; CHECK-LABEL: sminp_4s: 586; CHECK: // %bb.0: 587; CHECK-NEXT: ldr q0, [x0] 588; CHECK-NEXT: ldr q1, [x1] 589; CHECK-NEXT: sminp.4s v0, v0, v1 590; CHECK-NEXT: ret 591 %tmp1 = load <4 x i32>, ptr %A 592 %tmp2 = load <4 x i32>, ptr %B 593 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 594 ret <4 x i32> %tmp3 595} 596 597declare <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 598declare <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 599declare <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 600declare <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 601declare <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 602declare <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 603 604define <8 x i8> @uminp_8b(ptr %A, ptr %B) nounwind { 605; CHECK-LABEL: uminp_8b: 606; CHECK: // %bb.0: 607; CHECK-NEXT: ldr d0, [x0] 608; CHECK-NEXT: ldr d1, [x1] 609; CHECK-NEXT: uminp.8b v0, v0, v1 610; CHECK-NEXT: ret 611 %tmp1 = load <8 x i8>, ptr %A 612 %tmp2 = load <8 x i8>, ptr %B 613 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 614 ret <8 x i8> %tmp3 615} 616 617define <16 x i8> @uminp_16b(ptr %A, ptr %B) nounwind { 618; CHECK-LABEL: uminp_16b: 619; CHECK: // %bb.0: 620; CHECK-NEXT: ldr q0, [x0] 621; CHECK-NEXT: ldr q1, [x1] 622; CHECK-NEXT: uminp.16b v0, v0, v1 623; CHECK-NEXT: ret 624 %tmp1 = load <16 x i8>, ptr %A 625 %tmp2 = load <16 x i8>, ptr %B 626 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 627 ret <16 x i8> %tmp3 628} 629 630define <4 x i16> @uminp_4h(ptr %A, ptr %B) nounwind { 631; CHECK-LABEL: uminp_4h: 632; CHECK: // %bb.0: 633; CHECK-NEXT: ldr d0, [x0] 634; CHECK-NEXT: ldr d1, [x1] 635; CHECK-NEXT: uminp.4h v0, v0, v1 636; CHECK-NEXT: ret 637 %tmp1 = load <4 x i16>, ptr %A 638 %tmp2 = load <4 x i16>, ptr %B 639 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 640 ret <4 x i16> %tmp3 641} 642 643define <8 x i16> @uminp_8h(ptr %A, ptr %B) nounwind { 644; CHECK-LABEL: uminp_8h: 645; CHECK: // %bb.0: 646; CHECK-NEXT: ldr q0, [x0] 647; CHECK-NEXT: ldr q1, [x1] 648; CHECK-NEXT: uminp.8h v0, v0, v1 649; CHECK-NEXT: ret 650 %tmp1 = load <8 x i16>, ptr %A 651 %tmp2 = load <8 x i16>, ptr %B 652 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 653 ret <8 x i16> %tmp3 654} 655 656define <2 x i32> @uminp_2s(ptr %A, ptr %B) nounwind { 657; CHECK-LABEL: uminp_2s: 658; CHECK: // %bb.0: 659; CHECK-NEXT: ldr d0, [x0] 660; CHECK-NEXT: ldr d1, [x1] 661; CHECK-NEXT: uminp.2s v0, v0, v1 662; CHECK-NEXT: ret 663 %tmp1 = load <2 x i32>, ptr %A 664 %tmp2 = load <2 x i32>, ptr %B 665 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 666 ret <2 x i32> %tmp3 667} 668 669define <4 x i32> @uminp_4s(ptr %A, ptr %B) nounwind { 670; CHECK-LABEL: uminp_4s: 671; CHECK: // %bb.0: 672; CHECK-NEXT: ldr q0, [x0] 673; CHECK-NEXT: ldr q1, [x1] 674; CHECK-NEXT: uminp.4s v0, v0, v1 675; CHECK-NEXT: ret 676 %tmp1 = load <4 x i32>, ptr %A 677 %tmp2 = load <4 x i32>, ptr %B 678 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 679 ret <4 x i32> %tmp3 680} 681 682declare <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 683declare <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 684declare <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 685declare <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 686declare <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 687declare <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 688 689define <2 x float> @fmax_2s(ptr %A, ptr %B) nounwind { 690; CHECK-LABEL: fmax_2s: 691; CHECK: // %bb.0: 692; CHECK-NEXT: ldr d0, [x0] 693; CHECK-NEXT: ldr d1, [x1] 694; CHECK-NEXT: fmax.2s v0, v0, v1 695; CHECK-NEXT: ret 696 %tmp1 = load <2 x float>, ptr %A 697 %tmp2 = load <2 x float>, ptr %B 698 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 699 ret <2 x float> %tmp3 700} 701 702define <4 x float> @fmax_4s(ptr %A, ptr %B) nounwind { 703; CHECK-LABEL: fmax_4s: 704; CHECK: // %bb.0: 705; CHECK-NEXT: ldr q0, [x0] 706; CHECK-NEXT: ldr q1, [x1] 707; CHECK-NEXT: fmax.4s v0, v0, v1 708; CHECK-NEXT: ret 709 %tmp1 = load <4 x float>, ptr %A 710 %tmp2 = load <4 x float>, ptr %B 711 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 712 ret <4 x float> %tmp3 713} 714 715define <2 x double> @fmax_2d(ptr %A, ptr %B) nounwind { 716; CHECK-LABEL: fmax_2d: 717; CHECK: // %bb.0: 718; CHECK-NEXT: ldr q0, [x0] 719; CHECK-NEXT: ldr q1, [x1] 720; CHECK-NEXT: fmax.2d v0, v0, v1 721; CHECK-NEXT: ret 722 %tmp1 = load <2 x double>, ptr %A 723 %tmp2 = load <2 x double>, ptr %B 724 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 725 ret <2 x double> %tmp3 726} 727 728declare <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float>, <2 x float>) nounwind readnone 729declare <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float>, <4 x float>) nounwind readnone 730declare <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double>, <2 x double>) nounwind readnone 731 732define <2 x float> @fmaxp_2s(ptr %A, ptr %B) nounwind { 733; CHECK-LABEL: fmaxp_2s: 734; CHECK: // %bb.0: 735; CHECK-NEXT: ldr d0, [x0] 736; CHECK-NEXT: ldr d1, [x1] 737; CHECK-NEXT: fmaxp.2s v0, v0, v1 738; CHECK-NEXT: ret 739 %tmp1 = load <2 x float>, ptr %A 740 %tmp2 = load <2 x float>, ptr %B 741 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 742 ret <2 x float> %tmp3 743} 744 745define <4 x float> @fmaxp_4s(ptr %A, ptr %B) nounwind { 746; CHECK-LABEL: fmaxp_4s: 747; CHECK: // %bb.0: 748; CHECK-NEXT: ldr q0, [x0] 749; CHECK-NEXT: ldr q1, [x1] 750; CHECK-NEXT: fmaxp.4s v0, v0, v1 751; CHECK-NEXT: ret 752 %tmp1 = load <4 x float>, ptr %A 753 %tmp2 = load <4 x float>, ptr %B 754 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 755 ret <4 x float> %tmp3 756} 757 758define <2 x double> @fmaxp_2d(ptr %A, ptr %B) nounwind { 759; CHECK-LABEL: fmaxp_2d: 760; CHECK: // %bb.0: 761; CHECK-NEXT: ldr q0, [x0] 762; CHECK-NEXT: ldr q1, [x1] 763; CHECK-NEXT: fmaxp.2d v0, v0, v1 764; CHECK-NEXT: ret 765 %tmp1 = load <2 x double>, ptr %A 766 %tmp2 = load <2 x double>, ptr %B 767 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 768 ret <2 x double> %tmp3 769} 770 771declare <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float>, <2 x float>) nounwind readnone 772declare <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float>, <4 x float>) nounwind readnone 773declare <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double>, <2 x double>) nounwind readnone 774 775define <2 x float> @fmin_2s(ptr %A, ptr %B) nounwind { 776; CHECK-LABEL: fmin_2s: 777; CHECK: // %bb.0: 778; CHECK-NEXT: ldr d0, [x0] 779; CHECK-NEXT: ldr d1, [x1] 780; CHECK-NEXT: fmin.2s v0, v0, v1 781; CHECK-NEXT: ret 782 %tmp1 = load <2 x float>, ptr %A 783 %tmp2 = load <2 x float>, ptr %B 784 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 785 ret <2 x float> %tmp3 786} 787 788define <4 x float> @fmin_4s(ptr %A, ptr %B) nounwind { 789; CHECK-LABEL: fmin_4s: 790; CHECK: // %bb.0: 791; CHECK-NEXT: ldr q0, [x0] 792; CHECK-NEXT: ldr q1, [x1] 793; CHECK-NEXT: fmin.4s v0, v0, v1 794; CHECK-NEXT: ret 795 %tmp1 = load <4 x float>, ptr %A 796 %tmp2 = load <4 x float>, ptr %B 797 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 798 ret <4 x float> %tmp3 799} 800 801define <2 x double> @fmin_2d(ptr %A, ptr %B) nounwind { 802; CHECK-LABEL: fmin_2d: 803; CHECK: // %bb.0: 804; CHECK-NEXT: ldr q0, [x0] 805; CHECK-NEXT: ldr q1, [x1] 806; CHECK-NEXT: fmin.2d v0, v0, v1 807; CHECK-NEXT: ret 808 %tmp1 = load <2 x double>, ptr %A 809 %tmp2 = load <2 x double>, ptr %B 810 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 811 ret <2 x double> %tmp3 812} 813 814declare <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float>, <2 x float>) nounwind readnone 815declare <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float>, <4 x float>) nounwind readnone 816declare <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double>, <2 x double>) nounwind readnone 817 818define <2 x float> @fminp_2s(ptr %A, ptr %B) nounwind { 819; CHECK-LABEL: fminp_2s: 820; CHECK: // %bb.0: 821; CHECK-NEXT: ldr d0, [x0] 822; CHECK-NEXT: ldr d1, [x1] 823; CHECK-NEXT: fminp.2s v0, v0, v1 824; CHECK-NEXT: ret 825 %tmp1 = load <2 x float>, ptr %A 826 %tmp2 = load <2 x float>, ptr %B 827 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 828 ret <2 x float> %tmp3 829} 830 831define <4 x float> @fminp_4s(ptr %A, ptr %B) nounwind { 832; CHECK-LABEL: fminp_4s: 833; CHECK: // %bb.0: 834; CHECK-NEXT: ldr q0, [x0] 835; CHECK-NEXT: ldr q1, [x1] 836; CHECK-NEXT: fminp.4s v0, v0, v1 837; CHECK-NEXT: ret 838 %tmp1 = load <4 x float>, ptr %A 839 %tmp2 = load <4 x float>, ptr %B 840 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 841 ret <4 x float> %tmp3 842} 843 844define <2 x double> @fminp_2d(ptr %A, ptr %B) nounwind { 845; CHECK-LABEL: fminp_2d: 846; CHECK: // %bb.0: 847; CHECK-NEXT: ldr q0, [x0] 848; CHECK-NEXT: ldr q1, [x1] 849; CHECK-NEXT: fminp.2d v0, v0, v1 850; CHECK-NEXT: ret 851 %tmp1 = load <2 x double>, ptr %A 852 %tmp2 = load <2 x double>, ptr %B 853 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 854 ret <2 x double> %tmp3 855} 856 857declare <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float>, <2 x float>) nounwind readnone 858declare <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float>, <4 x float>) nounwind readnone 859declare <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double>, <2 x double>) nounwind readnone 860 861define <2 x float> @fminnmp_2s(ptr %A, ptr %B) nounwind { 862; CHECK-LABEL: fminnmp_2s: 863; CHECK: // %bb.0: 864; CHECK-NEXT: ldr d0, [x0] 865; CHECK-NEXT: ldr d1, [x1] 866; CHECK-NEXT: fminnmp.2s v0, v0, v1 867; CHECK-NEXT: ret 868 %tmp1 = load <2 x float>, ptr %A 869 %tmp2 = load <2 x float>, ptr %B 870 %tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 871 ret <2 x float> %tmp3 872} 873 874define <4 x float> @fminnmp_4s(ptr %A, ptr %B) nounwind { 875; CHECK-LABEL: fminnmp_4s: 876; CHECK: // %bb.0: 877; CHECK-NEXT: ldr q0, [x0] 878; CHECK-NEXT: ldr q1, [x1] 879; CHECK-NEXT: fminnmp.4s v0, v0, v1 880; CHECK-NEXT: ret 881 %tmp1 = load <4 x float>, ptr %A 882 %tmp2 = load <4 x float>, ptr %B 883 %tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 884 ret <4 x float> %tmp3 885} 886 887define <2 x double> @fminnmp_2d(ptr %A, ptr %B) nounwind { 888; CHECK-LABEL: fminnmp_2d: 889; CHECK: // %bb.0: 890; CHECK-NEXT: ldr q0, [x0] 891; CHECK-NEXT: ldr q1, [x1] 892; CHECK-NEXT: fminnmp.2d v0, v0, v1 893; CHECK-NEXT: ret 894 %tmp1 = load <2 x double>, ptr %A 895 %tmp2 = load <2 x double>, ptr %B 896 %tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 897 ret <2 x double> %tmp3 898} 899 900declare <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone 901declare <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone 902declare <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone 903 904define <2 x float> @fmaxnmp_2s(ptr %A, ptr %B) nounwind { 905; CHECK-LABEL: fmaxnmp_2s: 906; CHECK: // %bb.0: 907; CHECK-NEXT: ldr d0, [x0] 908; CHECK-NEXT: ldr d1, [x1] 909; CHECK-NEXT: fmaxnmp.2s v0, v0, v1 910; CHECK-NEXT: ret 911 %tmp1 = load <2 x float>, ptr %A 912 %tmp2 = load <2 x float>, ptr %B 913 %tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2) 914 ret <2 x float> %tmp3 915} 916 917define <4 x float> @fmaxnmp_4s(ptr %A, ptr %B) nounwind { 918; CHECK-LABEL: fmaxnmp_4s: 919; CHECK: // %bb.0: 920; CHECK-NEXT: ldr q0, [x0] 921; CHECK-NEXT: ldr q1, [x1] 922; CHECK-NEXT: fmaxnmp.4s v0, v0, v1 923; CHECK-NEXT: ret 924 %tmp1 = load <4 x float>, ptr %A 925 %tmp2 = load <4 x float>, ptr %B 926 %tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2) 927 ret <4 x float> %tmp3 928} 929 930define <2 x double> @fmaxnmp_2d(ptr %A, ptr %B) nounwind { 931; CHECK-LABEL: fmaxnmp_2d: 932; CHECK: // %bb.0: 933; CHECK-NEXT: ldr q0, [x0] 934; CHECK-NEXT: ldr q1, [x1] 935; CHECK-NEXT: fmaxnmp.2d v0, v0, v1 936; CHECK-NEXT: ret 937 %tmp1 = load <2 x double>, ptr %A 938 %tmp2 = load <2 x double>, ptr %B 939 %tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2) 940 ret <2 x double> %tmp3 941} 942 943declare <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float>, <2 x float>) nounwind readnone 944declare <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float>, <4 x float>) nounwind readnone 945declare <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double>, <2 x double>) nounwind readnone 946