1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; FMAXNM 10; 11 12; Don't use SVE for 64-bit vectors. 13define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 { 14; CHECK-LABEL: fmaxnm_v4f16: 15; CHECK: // %bb.0: 16; CHECK-NEXT: fmaxnm v0.4h, v0.4h, v1.4h 17; CHECK-NEXT: ret 18 %res = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %op1, <4 x half> %op2) 19 ret <4 x half> %res 20} 21 22; Don't use SVE for 128-bit vectors. 23define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 { 24; CHECK-LABEL: fmaxnm_v8f16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: fmaxnm v0.8h, v0.8h, v1.8h 27; CHECK-NEXT: ret 28 %res = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %op1, <8 x half> %op2) 29 ret <8 x half> %res 30} 31 32define void @fmaxnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 33; CHECK-LABEL: fmaxnm_v16f16: 34; CHECK: // %bb.0: 35; CHECK-NEXT: ptrue p0.h, vl16 36; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 37; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 38; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 39; CHECK-NEXT: st1h { z0.h }, p0, [x0] 40; CHECK-NEXT: ret 41 %op1 = load <16 x half>, ptr %a 42 %op2 = load <16 x half>, ptr %b 43 %res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2) 44 store <16 x half> %res, ptr %a 45 ret void 46} 47 48define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 { 49; VBITS_EQ_256-LABEL: fmaxnm_v32f16: 50; VBITS_EQ_256: // %bb.0: 51; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 52; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 53; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 54; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 55; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] 56; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] 57; VBITS_EQ_256-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 58; VBITS_EQ_256-NEXT: movprfx z1, z2 59; VBITS_EQ_256-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h 60; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 61; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] 62; VBITS_EQ_256-NEXT: ret 63; 64; VBITS_GE_512-LABEL: fmaxnm_v32f16: 65; VBITS_GE_512: // %bb.0: 66; VBITS_GE_512-NEXT: ptrue p0.h, vl32 67; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 68; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 69; VBITS_GE_512-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 70; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 71; VBITS_GE_512-NEXT: ret 72 %op1 = load <32 x half>, ptr %a 73 %op2 = load <32 x half>, ptr %b 74 %res = call <32 x half> @llvm.maxnum.v32f16(<32 x half> %op1, <32 x half> %op2) 75 store <32 x half> %res, ptr %a 76 ret void 77} 78 79define void @fmaxnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 80; CHECK-LABEL: fmaxnm_v64f16: 81; CHECK: // %bb.0: 82; CHECK-NEXT: ptrue p0.h, vl64 83; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 84; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 85; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 86; CHECK-NEXT: st1h { z0.h }, p0, [x0] 87; CHECK-NEXT: ret 88 %op1 = load <64 x half>, ptr %a 89 %op2 = load <64 x half>, ptr %b 90 %res = call <64 x half> @llvm.maxnum.v64f16(<64 x half> %op1, <64 x half> %op2) 91 store <64 x half> %res, ptr %a 92 ret void 93} 94 95define void @fmaxnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 96; CHECK-LABEL: fmaxnm_v128f16: 97; CHECK: // %bb.0: 98; CHECK-NEXT: ptrue p0.h, vl128 99; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 100; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 101; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 102; CHECK-NEXT: st1h { z0.h }, p0, [x0] 103; CHECK-NEXT: ret 104 %op1 = load <128 x half>, ptr %a 105 %op2 = load <128 x half>, ptr %b 106 %res = call <128 x half> @llvm.maxnum.v128f16(<128 x half> %op1, <128 x half> %op2) 107 store <128 x half> %res, ptr %a 108 ret void 109} 110 111; Don't use SVE for 64-bit vectors. 112define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 { 113; CHECK-LABEL: fmaxnm_v2f32: 114; CHECK: // %bb.0: 115; CHECK-NEXT: fmaxnm v0.2s, v0.2s, v1.2s 116; CHECK-NEXT: ret 117 %res = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %op1, <2 x float> %op2) 118 ret <2 x float> %res 119} 120 121; Don't use SVE for 128-bit vectors. 122define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 { 123; CHECK-LABEL: fmaxnm_v4f32: 124; CHECK: // %bb.0: 125; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s 126; CHECK-NEXT: ret 127 %res = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %op1, <4 x float> %op2) 128 ret <4 x float> %res 129} 130 131define void @fmaxnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 132; CHECK-LABEL: fmaxnm_v8f32: 133; CHECK: // %bb.0: 134; CHECK-NEXT: ptrue p0.s, vl8 135; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 136; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 137; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 138; CHECK-NEXT: st1w { z0.s }, p0, [x0] 139; CHECK-NEXT: ret 140 %op1 = load <8 x float>, ptr %a 141 %op2 = load <8 x float>, ptr %b 142 %res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2) 143 store <8 x float> %res, ptr %a 144 ret void 145} 146 147define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 { 148; VBITS_EQ_256-LABEL: fmaxnm_v16f32: 149; VBITS_EQ_256: // %bb.0: 150; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 151; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 152; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 153; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 154; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] 155; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] 156; VBITS_EQ_256-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 157; VBITS_EQ_256-NEXT: movprfx z1, z2 158; VBITS_EQ_256-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s 159; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 160; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] 161; VBITS_EQ_256-NEXT: ret 162; 163; VBITS_GE_512-LABEL: fmaxnm_v16f32: 164; VBITS_GE_512: // %bb.0: 165; VBITS_GE_512-NEXT: ptrue p0.s, vl16 166; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 167; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 168; VBITS_GE_512-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 169; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 170; VBITS_GE_512-NEXT: ret 171 %op1 = load <16 x float>, ptr %a 172 %op2 = load <16 x float>, ptr %b 173 %res = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %op1, <16 x float> %op2) 174 store <16 x float> %res, ptr %a 175 ret void 176} 177 178define void @fmaxnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 179; CHECK-LABEL: fmaxnm_v32f32: 180; CHECK: // %bb.0: 181; CHECK-NEXT: ptrue p0.s, vl32 182; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 183; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 184; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 185; CHECK-NEXT: st1w { z0.s }, p0, [x0] 186; CHECK-NEXT: ret 187 %op1 = load <32 x float>, ptr %a 188 %op2 = load <32 x float>, ptr %b 189 %res = call <32 x float> @llvm.maxnum.v32f32(<32 x float> %op1, <32 x float> %op2) 190 store <32 x float> %res, ptr %a 191 ret void 192} 193 194define void @fmaxnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 195; CHECK-LABEL: fmaxnm_v64f32: 196; CHECK: // %bb.0: 197; CHECK-NEXT: ptrue p0.s, vl64 198; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 199; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 200; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 201; CHECK-NEXT: st1w { z0.s }, p0, [x0] 202; CHECK-NEXT: ret 203 %op1 = load <64 x float>, ptr %a 204 %op2 = load <64 x float>, ptr %b 205 %res = call <64 x float> @llvm.maxnum.v64f32(<64 x float> %op1, <64 x float> %op2) 206 store <64 x float> %res, ptr %a 207 ret void 208} 209 210; Don't use SVE for 64-bit vectors. 211define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 { 212; CHECK-LABEL: fmaxnm_v1f64: 213; CHECK: // %bb.0: 214; CHECK-NEXT: fmaxnm d0, d0, d1 215; CHECK-NEXT: ret 216 %res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2) 217 ret <1 x double> %res 218} 219 220; Don't use SVE for 128-bit vectors. 221define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 { 222; CHECK-LABEL: fmaxnm_v2f64: 223; CHECK: // %bb.0: 224; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d 225; CHECK-NEXT: ret 226 %res = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %op1, <2 x double> %op2) 227 ret <2 x double> %res 228} 229 230define void @fmaxnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 231; CHECK-LABEL: fmaxnm_v4f64: 232; CHECK: // %bb.0: 233; CHECK-NEXT: ptrue p0.d, vl4 234; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 235; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 236; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d 237; CHECK-NEXT: st1d { z0.d }, p0, [x0] 238; CHECK-NEXT: ret 239 %op1 = load <4 x double>, ptr %a 240 %op2 = load <4 x double>, ptr %b 241 %res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2) 242 store <4 x double> %res, ptr %a 243 ret void 244} 245 246define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 { 247; VBITS_EQ_256-LABEL: fmaxnm_v8f64: 248; VBITS_EQ_256: // %bb.0: 249; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 250; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 251; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 252; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 253; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] 254; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] 255; VBITS_EQ_256-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d 256; VBITS_EQ_256-NEXT: movprfx z1, z2 257; VBITS_EQ_256-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d 258; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 259; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] 260; VBITS_EQ_256-NEXT: ret 261; 262; VBITS_GE_512-LABEL: fmaxnm_v8f64: 263; VBITS_GE_512: // %bb.0: 264; VBITS_GE_512-NEXT: ptrue p0.d, vl8 265; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 266; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 267; VBITS_GE_512-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d 268; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 269; VBITS_GE_512-NEXT: ret 270 %op1 = load <8 x double>, ptr %a 271 %op2 = load <8 x double>, ptr %b 272 %res = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %op1, <8 x double> %op2) 273 store <8 x double> %res, ptr %a 274 ret void 275} 276 277define void @fmaxnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 278; CHECK-LABEL: fmaxnm_v16f64: 279; CHECK: // %bb.0: 280; CHECK-NEXT: ptrue p0.d, vl16 281; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 282; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 283; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d 284; CHECK-NEXT: st1d { z0.d }, p0, [x0] 285; CHECK-NEXT: ret 286 %op1 = load <16 x double>, ptr %a 287 %op2 = load <16 x double>, ptr %b 288 %res = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %op1, <16 x double> %op2) 289 store <16 x double> %res, ptr %a 290 ret void 291} 292 293define void @fmaxnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 294; CHECK-LABEL: fmaxnm_v32f64: 295; CHECK: // %bb.0: 296; CHECK-NEXT: ptrue p0.d, vl32 297; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 298; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 299; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d 300; CHECK-NEXT: st1d { z0.d }, p0, [x0] 301; CHECK-NEXT: ret 302 %op1 = load <32 x double>, ptr %a 303 %op2 = load <32 x double>, ptr %b 304 %res = call <32 x double> @llvm.maxnum.v32f64(<32 x double> %op1, <32 x double> %op2) 305 store <32 x double> %res, ptr %a 306 ret void 307} 308 309; 310; FMINNM 311; 312 313; Don't use SVE for 64-bit vectors. 314define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 { 315; CHECK-LABEL: fminnm_v4f16: 316; CHECK: // %bb.0: 317; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h 318; CHECK-NEXT: ret 319 %res = call <4 x half> @llvm.minnum.v4f16(<4 x half> %op1, <4 x half> %op2) 320 ret <4 x half> %res 321} 322 323; Don't use SVE for 128-bit vectors. 324define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 { 325; CHECK-LABEL: fminnm_v8f16: 326; CHECK: // %bb.0: 327; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h 328; CHECK-NEXT: ret 329 %res = call <8 x half> @llvm.minnum.v8f16(<8 x half> %op1, <8 x half> %op2) 330 ret <8 x half> %res 331} 332 333define void @fminnm_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 334; CHECK-LABEL: fminnm_v16f16: 335; CHECK: // %bb.0: 336; CHECK-NEXT: ptrue p0.h, vl16 337; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 338; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 339; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 340; CHECK-NEXT: st1h { z0.h }, p0, [x0] 341; CHECK-NEXT: ret 342 %op1 = load <16 x half>, ptr %a 343 %op2 = load <16 x half>, ptr %b 344 %res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2) 345 store <16 x half> %res, ptr %a 346 ret void 347} 348 349define void @fminnm_v32f16(ptr %a, ptr %b) #0 { 350; VBITS_EQ_256-LABEL: fminnm_v32f16: 351; VBITS_EQ_256: // %bb.0: 352; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 353; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 354; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 355; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 356; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] 357; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] 358; VBITS_EQ_256-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 359; VBITS_EQ_256-NEXT: movprfx z1, z2 360; VBITS_EQ_256-NEXT: fminnm z1.h, p0/m, z1.h, z3.h 361; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 362; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] 363; VBITS_EQ_256-NEXT: ret 364; 365; VBITS_GE_512-LABEL: fminnm_v32f16: 366; VBITS_GE_512: // %bb.0: 367; VBITS_GE_512-NEXT: ptrue p0.h, vl32 368; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 369; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 370; VBITS_GE_512-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 371; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 372; VBITS_GE_512-NEXT: ret 373 %op1 = load <32 x half>, ptr %a 374 %op2 = load <32 x half>, ptr %b 375 %res = call <32 x half> @llvm.minnum.v32f16(<32 x half> %op1, <32 x half> %op2) 376 store <32 x half> %res, ptr %a 377 ret void 378} 379 380define void @fminnm_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 381; CHECK-LABEL: fminnm_v64f16: 382; CHECK: // %bb.0: 383; CHECK-NEXT: ptrue p0.h, vl64 384; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 385; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 386; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 387; CHECK-NEXT: st1h { z0.h }, p0, [x0] 388; CHECK-NEXT: ret 389 %op1 = load <64 x half>, ptr %a 390 %op2 = load <64 x half>, ptr %b 391 %res = call <64 x half> @llvm.minnum.v64f16(<64 x half> %op1, <64 x half> %op2) 392 store <64 x half> %res, ptr %a 393 ret void 394} 395 396define void @fminnm_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 397; CHECK-LABEL: fminnm_v128f16: 398; CHECK: // %bb.0: 399; CHECK-NEXT: ptrue p0.h, vl128 400; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 401; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 402; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 403; CHECK-NEXT: st1h { z0.h }, p0, [x0] 404; CHECK-NEXT: ret 405 %op1 = load <128 x half>, ptr %a 406 %op2 = load <128 x half>, ptr %b 407 %res = call <128 x half> @llvm.minnum.v128f16(<128 x half> %op1, <128 x half> %op2) 408 store <128 x half> %res, ptr %a 409 ret void 410} 411 412; Don't use SVE for 64-bit vectors. 413define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 { 414; CHECK-LABEL: fminnm_v2f32: 415; CHECK: // %bb.0: 416; CHECK-NEXT: fminnm v0.2s, v0.2s, v1.2s 417; CHECK-NEXT: ret 418 %res = call <2 x float> @llvm.minnum.v2f32(<2 x float> %op1, <2 x float> %op2) 419 ret <2 x float> %res 420} 421 422; Don't use SVE for 128-bit vectors. 423define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 { 424; CHECK-LABEL: fminnm_v4f32: 425; CHECK: // %bb.0: 426; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s 427; CHECK-NEXT: ret 428 %res = call <4 x float> @llvm.minnum.v4f32(<4 x float> %op1, <4 x float> %op2) 429 ret <4 x float> %res 430} 431 432define void @fminnm_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 433; CHECK-LABEL: fminnm_v8f32: 434; CHECK: // %bb.0: 435; CHECK-NEXT: ptrue p0.s, vl8 436; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 437; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 438; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 439; CHECK-NEXT: st1w { z0.s }, p0, [x0] 440; CHECK-NEXT: ret 441 %op1 = load <8 x float>, ptr %a 442 %op2 = load <8 x float>, ptr %b 443 %res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2) 444 store <8 x float> %res, ptr %a 445 ret void 446} 447 448define void @fminnm_v16f32(ptr %a, ptr %b) #0 { 449; VBITS_EQ_256-LABEL: fminnm_v16f32: 450; VBITS_EQ_256: // %bb.0: 451; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 452; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 453; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 454; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 455; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] 456; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] 457; VBITS_EQ_256-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 458; VBITS_EQ_256-NEXT: movprfx z1, z2 459; VBITS_EQ_256-NEXT: fminnm z1.s, p0/m, z1.s, z3.s 460; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 461; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] 462; VBITS_EQ_256-NEXT: ret 463; 464; VBITS_GE_512-LABEL: fminnm_v16f32: 465; VBITS_GE_512: // %bb.0: 466; VBITS_GE_512-NEXT: ptrue p0.s, vl16 467; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 468; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 469; VBITS_GE_512-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 470; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 471; VBITS_GE_512-NEXT: ret 472 %op1 = load <16 x float>, ptr %a 473 %op2 = load <16 x float>, ptr %b 474 %res = call <16 x float> @llvm.minnum.v16f32(<16 x float> %op1, <16 x float> %op2) 475 store <16 x float> %res, ptr %a 476 ret void 477} 478 479define void @fminnm_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 480; CHECK-LABEL: fminnm_v32f32: 481; CHECK: // %bb.0: 482; CHECK-NEXT: ptrue p0.s, vl32 483; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 484; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 485; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 486; CHECK-NEXT: st1w { z0.s }, p0, [x0] 487; CHECK-NEXT: ret 488 %op1 = load <32 x float>, ptr %a 489 %op2 = load <32 x float>, ptr %b 490 %res = call <32 x float> @llvm.minnum.v32f32(<32 x float> %op1, <32 x float> %op2) 491 store <32 x float> %res, ptr %a 492 ret void 493} 494 495define void @fminnm_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 496; CHECK-LABEL: fminnm_v64f32: 497; CHECK: // %bb.0: 498; CHECK-NEXT: ptrue p0.s, vl64 499; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 500; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 501; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 502; CHECK-NEXT: st1w { z0.s }, p0, [x0] 503; CHECK-NEXT: ret 504 %op1 = load <64 x float>, ptr %a 505 %op2 = load <64 x float>, ptr %b 506 %res = call <64 x float> @llvm.minnum.v64f32(<64 x float> %op1, <64 x float> %op2) 507 store <64 x float> %res, ptr %a 508 ret void 509} 510 511; Don't use SVE for 64-bit vectors. 512define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 { 513; CHECK-LABEL: fminnm_v1f64: 514; CHECK: // %bb.0: 515; CHECK-NEXT: fminnm d0, d0, d1 516; CHECK-NEXT: ret 517 %res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2) 518 ret <1 x double> %res 519} 520 521; Don't use SVE for 128-bit vectors. 522define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 { 523; CHECK-LABEL: fminnm_v2f64: 524; CHECK: // %bb.0: 525; CHECK-NEXT: fminnm v0.2d, v0.2d, v1.2d 526; CHECK-NEXT: ret 527 %res = call <2 x double> @llvm.minnum.v2f64(<2 x double> %op1, <2 x double> %op2) 528 ret <2 x double> %res 529} 530 531define void @fminnm_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 532; CHECK-LABEL: fminnm_v4f64: 533; CHECK: // %bb.0: 534; CHECK-NEXT: ptrue p0.d, vl4 535; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 536; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 537; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d 538; CHECK-NEXT: st1d { z0.d }, p0, [x0] 539; CHECK-NEXT: ret 540 %op1 = load <4 x double>, ptr %a 541 %op2 = load <4 x double>, ptr %b 542 %res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2) 543 store <4 x double> %res, ptr %a 544 ret void 545} 546 547define void @fminnm_v8f64(ptr %a, ptr %b) #0 { 548; VBITS_EQ_256-LABEL: fminnm_v8f64: 549; VBITS_EQ_256: // %bb.0: 550; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 551; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 552; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 553; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 554; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] 555; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] 556; VBITS_EQ_256-NEXT: fminnm z0.d, p0/m, z0.d, z1.d 557; VBITS_EQ_256-NEXT: movprfx z1, z2 558; VBITS_EQ_256-NEXT: fminnm z1.d, p0/m, z1.d, z3.d 559; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 560; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] 561; VBITS_EQ_256-NEXT: ret 562; 563; VBITS_GE_512-LABEL: fminnm_v8f64: 564; VBITS_GE_512: // %bb.0: 565; VBITS_GE_512-NEXT: ptrue p0.d, vl8 566; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 567; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 568; VBITS_GE_512-NEXT: fminnm z0.d, p0/m, z0.d, z1.d 569; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 570; VBITS_GE_512-NEXT: ret 571 %op1 = load <8 x double>, ptr %a 572 %op2 = load <8 x double>, ptr %b 573 %res = call <8 x double> @llvm.minnum.v8f64(<8 x double> %op1, <8 x double> %op2) 574 store <8 x double> %res, ptr %a 575 ret void 576} 577 578define void @fminnm_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 579; CHECK-LABEL: fminnm_v16f64: 580; CHECK: // %bb.0: 581; CHECK-NEXT: ptrue p0.d, vl16 582; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 583; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 584; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d 585; CHECK-NEXT: st1d { z0.d }, p0, [x0] 586; CHECK-NEXT: ret 587 %op1 = load <16 x double>, ptr %a 588 %op2 = load <16 x double>, ptr %b 589 %res = call <16 x double> @llvm.minnum.v16f64(<16 x double> %op1, <16 x double> %op2) 590 store <16 x double> %res, ptr %a 591 ret void 592} 593 594define void @fminnm_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 595; CHECK-LABEL: fminnm_v32f64: 596; CHECK: // %bb.0: 597; CHECK-NEXT: ptrue p0.d, vl32 598; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 599; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 600; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d 601; CHECK-NEXT: st1d { z0.d }, p0, [x0] 602; CHECK-NEXT: ret 603 %op1 = load <32 x double>, ptr %a 604 %op2 = load <32 x double>, ptr %b 605 %res = call <32 x double> @llvm.minnum.v32f64(<32 x double> %op1, <32 x double> %op2) 606 store <32 x double> %res, ptr %a 607 ret void 608} 609 610; 611; FMAX 612; 613 614; Don't use SVE for 64-bit vectors. 615define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 { 616; CHECK-LABEL: fmax_v4f16: 617; CHECK: // %bb.0: 618; CHECK-NEXT: fmax v0.4h, v0.4h, v1.4h 619; CHECK-NEXT: ret 620 %res = call <4 x half> @llvm.maximum.v4f16(<4 x half> %op1, <4 x half> %op2) 621 ret <4 x half> %res 622} 623 624; Don't use SVE for 128-bit vectors. 625define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 { 626; CHECK-LABEL: fmax_v8f16: 627; CHECK: // %bb.0: 628; CHECK-NEXT: fmax v0.8h, v0.8h, v1.8h 629; CHECK-NEXT: ret 630 %res = call <8 x half> @llvm.maximum.v8f16(<8 x half> %op1, <8 x half> %op2) 631 ret <8 x half> %res 632} 633 634define void @fmax_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 635; CHECK-LABEL: fmax_v16f16: 636; CHECK: // %bb.0: 637; CHECK-NEXT: ptrue p0.h, vl16 638; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 639; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 640; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h 641; CHECK-NEXT: st1h { z0.h }, p0, [x0] 642; CHECK-NEXT: ret 643 %op1 = load <16 x half>, ptr %a 644 %op2 = load <16 x half>, ptr %b 645 %res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2) 646 store <16 x half> %res, ptr %a 647 ret void 648} 649 650define void @fmax_v32f16(ptr %a, ptr %b) #0 { 651; VBITS_EQ_256-LABEL: fmax_v32f16: 652; VBITS_EQ_256: // %bb.0: 653; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 654; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 655; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 656; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 657; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] 658; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] 659; VBITS_EQ_256-NEXT: fmax z0.h, p0/m, z0.h, z1.h 660; VBITS_EQ_256-NEXT: movprfx z1, z2 661; VBITS_EQ_256-NEXT: fmax z1.h, p0/m, z1.h, z3.h 662; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 663; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] 664; VBITS_EQ_256-NEXT: ret 665; 666; VBITS_GE_512-LABEL: fmax_v32f16: 667; VBITS_GE_512: // %bb.0: 668; VBITS_GE_512-NEXT: ptrue p0.h, vl32 669; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 670; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 671; VBITS_GE_512-NEXT: fmax z0.h, p0/m, z0.h, z1.h 672; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 673; VBITS_GE_512-NEXT: ret 674 %op1 = load <32 x half>, ptr %a 675 %op2 = load <32 x half>, ptr %b 676 %res = call <32 x half> @llvm.maximum.v32f16(<32 x half> %op1, <32 x half> %op2) 677 store <32 x half> %res, ptr %a 678 ret void 679} 680 681define void @fmax_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 682; CHECK-LABEL: fmax_v64f16: 683; CHECK: // %bb.0: 684; CHECK-NEXT: ptrue p0.h, vl64 685; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 686; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 687; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h 688; CHECK-NEXT: st1h { z0.h }, p0, [x0] 689; CHECK-NEXT: ret 690 %op1 = load <64 x half>, ptr %a 691 %op2 = load <64 x half>, ptr %b 692 %res = call <64 x half> @llvm.maximum.v64f16(<64 x half> %op1, <64 x half> %op2) 693 store <64 x half> %res, ptr %a 694 ret void 695} 696 697define void @fmax_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 698; CHECK-LABEL: fmax_v128f16: 699; CHECK: // %bb.0: 700; CHECK-NEXT: ptrue p0.h, vl128 701; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 702; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 703; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h 704; CHECK-NEXT: st1h { z0.h }, p0, [x0] 705; CHECK-NEXT: ret 706 %op1 = load <128 x half>, ptr %a 707 %op2 = load <128 x half>, ptr %b 708 %res = call <128 x half> @llvm.maximum.v128f16(<128 x half> %op1, <128 x half> %op2) 709 store <128 x half> %res, ptr %a 710 ret void 711} 712 713; Don't use SVE for 64-bit vectors. 714define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 { 715; CHECK-LABEL: fmax_v2f32: 716; CHECK: // %bb.0: 717; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s 718; CHECK-NEXT: ret 719 %res = call <2 x float> @llvm.maximum.v2f32(<2 x float> %op1, <2 x float> %op2) 720 ret <2 x float> %res 721} 722 723; Don't use SVE for 128-bit vectors. 724define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 { 725; CHECK-LABEL: fmax_v4f32: 726; CHECK: // %bb.0: 727; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s 728; CHECK-NEXT: ret 729 %res = call <4 x float> @llvm.maximum.v4f32(<4 x float> %op1, <4 x float> %op2) 730 ret <4 x float> %res 731} 732 733define void @fmax_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 734; CHECK-LABEL: fmax_v8f32: 735; CHECK: // %bb.0: 736; CHECK-NEXT: ptrue p0.s, vl8 737; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 738; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 739; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s 740; CHECK-NEXT: st1w { z0.s }, p0, [x0] 741; CHECK-NEXT: ret 742 %op1 = load <8 x float>, ptr %a 743 %op2 = load <8 x float>, ptr %b 744 %res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2) 745 store <8 x float> %res, ptr %a 746 ret void 747} 748 749define void @fmax_v16f32(ptr %a, ptr %b) #0 { 750; VBITS_EQ_256-LABEL: fmax_v16f32: 751; VBITS_EQ_256: // %bb.0: 752; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 753; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 754; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 755; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 756; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] 757; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] 758; VBITS_EQ_256-NEXT: fmax z0.s, p0/m, z0.s, z1.s 759; VBITS_EQ_256-NEXT: movprfx z1, z2 760; VBITS_EQ_256-NEXT: fmax z1.s, p0/m, z1.s, z3.s 761; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 762; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] 763; VBITS_EQ_256-NEXT: ret 764; 765; VBITS_GE_512-LABEL: fmax_v16f32: 766; VBITS_GE_512: // %bb.0: 767; VBITS_GE_512-NEXT: ptrue p0.s, vl16 768; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 769; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 770; VBITS_GE_512-NEXT: fmax z0.s, p0/m, z0.s, z1.s 771; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 772; VBITS_GE_512-NEXT: ret 773 %op1 = load <16 x float>, ptr %a 774 %op2 = load <16 x float>, ptr %b 775 %res = call <16 x float> @llvm.maximum.v16f32(<16 x float> %op1, <16 x float> %op2) 776 store <16 x float> %res, ptr %a 777 ret void 778} 779 780define void @fmax_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 781; CHECK-LABEL: fmax_v32f32: 782; CHECK: // %bb.0: 783; CHECK-NEXT: ptrue p0.s, vl32 784; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 785; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 786; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s 787; CHECK-NEXT: st1w { z0.s }, p0, [x0] 788; CHECK-NEXT: ret 789 %op1 = load <32 x float>, ptr %a 790 %op2 = load <32 x float>, ptr %b 791 %res = call <32 x float> @llvm.maximum.v32f32(<32 x float> %op1, <32 x float> %op2) 792 store <32 x float> %res, ptr %a 793 ret void 794} 795 796define void @fmax_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 797; CHECK-LABEL: fmax_v64f32: 798; CHECK: // %bb.0: 799; CHECK-NEXT: ptrue p0.s, vl64 800; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 801; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 802; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s 803; CHECK-NEXT: st1w { z0.s }, p0, [x0] 804; CHECK-NEXT: ret 805 %op1 = load <64 x float>, ptr %a 806 %op2 = load <64 x float>, ptr %b 807 %res = call <64 x float> @llvm.maximum.v64f32(<64 x float> %op1, <64 x float> %op2) 808 store <64 x float> %res, ptr %a 809 ret void 810} 811 812; Don't use SVE for 64-bit vectors. 813define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 { 814; CHECK-LABEL: fmax_v1f64: 815; CHECK: // %bb.0: 816; CHECK-NEXT: fmax d0, d0, d1 817; CHECK-NEXT: ret 818 %res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2) 819 ret <1 x double> %res 820} 821 822; Don't use SVE for 128-bit vectors. 823define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 { 824; CHECK-LABEL: fmax_v2f64: 825; CHECK: // %bb.0: 826; CHECK-NEXT: fmax v0.2d, v0.2d, v1.2d 827; CHECK-NEXT: ret 828 %res = call <2 x double> @llvm.maximum.v2f64(<2 x double> %op1, <2 x double> %op2) 829 ret <2 x double> %res 830} 831 832define void @fmax_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 833; CHECK-LABEL: fmax_v4f64: 834; CHECK: // %bb.0: 835; CHECK-NEXT: ptrue p0.d, vl4 836; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 837; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 838; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d 839; CHECK-NEXT: st1d { z0.d }, p0, [x0] 840; CHECK-NEXT: ret 841 %op1 = load <4 x double>, ptr %a 842 %op2 = load <4 x double>, ptr %b 843 %res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2) 844 store <4 x double> %res, ptr %a 845 ret void 846} 847 848define void @fmax_v8f64(ptr %a, ptr %b) #0 { 849; VBITS_EQ_256-LABEL: fmax_v8f64: 850; VBITS_EQ_256: // %bb.0: 851; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 852; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 853; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 854; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 855; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] 856; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] 857; VBITS_EQ_256-NEXT: fmax z0.d, p0/m, z0.d, z1.d 858; VBITS_EQ_256-NEXT: movprfx z1, z2 859; VBITS_EQ_256-NEXT: fmax z1.d, p0/m, z1.d, z3.d 860; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 861; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] 862; VBITS_EQ_256-NEXT: ret 863; 864; VBITS_GE_512-LABEL: fmax_v8f64: 865; VBITS_GE_512: // %bb.0: 866; VBITS_GE_512-NEXT: ptrue p0.d, vl8 867; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 868; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 869; VBITS_GE_512-NEXT: fmax z0.d, p0/m, z0.d, z1.d 870; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 871; VBITS_GE_512-NEXT: ret 872 %op1 = load <8 x double>, ptr %a 873 %op2 = load <8 x double>, ptr %b 874 %res = call <8 x double> @llvm.maximum.v8f64(<8 x double> %op1, <8 x double> %op2) 875 store <8 x double> %res, ptr %a 876 ret void 877} 878 879define void @fmax_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 880; CHECK-LABEL: fmax_v16f64: 881; CHECK: // %bb.0: 882; CHECK-NEXT: ptrue p0.d, vl16 883; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 884; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 885; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d 886; CHECK-NEXT: st1d { z0.d }, p0, [x0] 887; CHECK-NEXT: ret 888 %op1 = load <16 x double>, ptr %a 889 %op2 = load <16 x double>, ptr %b 890 %res = call <16 x double> @llvm.maximum.v16f64(<16 x double> %op1, <16 x double> %op2) 891 store <16 x double> %res, ptr %a 892 ret void 893} 894 895define void @fmax_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 896; CHECK-LABEL: fmax_v32f64: 897; CHECK: // %bb.0: 898; CHECK-NEXT: ptrue p0.d, vl32 899; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 900; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 901; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d 902; CHECK-NEXT: st1d { z0.d }, p0, [x0] 903; CHECK-NEXT: ret 904 %op1 = load <32 x double>, ptr %a 905 %op2 = load <32 x double>, ptr %b 906 %res = call <32 x double> @llvm.maximum.v32f64(<32 x double> %op1, <32 x double> %op2) 907 store <32 x double> %res, ptr %a 908 ret void 909} 910 911; 912; FMIN 913; 914 915; Don't use SVE for 64-bit vectors. 916define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 { 917; CHECK-LABEL: fmin_v4f16: 918; CHECK: // %bb.0: 919; CHECK-NEXT: fmin v0.4h, v0.4h, v1.4h 920; CHECK-NEXT: ret 921 %res = call <4 x half> @llvm.minimum.v4f16(<4 x half> %op1, <4 x half> %op2) 922 ret <4 x half> %res 923} 924 925; Don't use SVE for 128-bit vectors. 926define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 { 927; CHECK-LABEL: fmin_v8f16: 928; CHECK: // %bb.0: 929; CHECK-NEXT: fmin v0.8h, v0.8h, v1.8h 930; CHECK-NEXT: ret 931 %res = call <8 x half> @llvm.minimum.v8f16(<8 x half> %op1, <8 x half> %op2) 932 ret <8 x half> %res 933} 934 935define void @fmin_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 { 936; CHECK-LABEL: fmin_v16f16: 937; CHECK: // %bb.0: 938; CHECK-NEXT: ptrue p0.h, vl16 939; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 940; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 941; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h 942; CHECK-NEXT: st1h { z0.h }, p0, [x0] 943; CHECK-NEXT: ret 944 %op1 = load <16 x half>, ptr %a 945 %op2 = load <16 x half>, ptr %b 946 %res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2) 947 store <16 x half> %res, ptr %a 948 ret void 949} 950 951define void @fmin_v32f16(ptr %a, ptr %b) #0 { 952; VBITS_EQ_256-LABEL: fmin_v32f16: 953; VBITS_EQ_256: // %bb.0: 954; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 955; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 956; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 957; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 958; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0] 959; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] 960; VBITS_EQ_256-NEXT: fmin z0.h, p0/m, z0.h, z1.h 961; VBITS_EQ_256-NEXT: movprfx z1, z2 962; VBITS_EQ_256-NEXT: fmin z1.h, p0/m, z1.h, z3.h 963; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 964; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] 965; VBITS_EQ_256-NEXT: ret 966; 967; VBITS_GE_512-LABEL: fmin_v32f16: 968; VBITS_GE_512: // %bb.0: 969; VBITS_GE_512-NEXT: ptrue p0.h, vl32 970; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 971; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 972; VBITS_GE_512-NEXT: fmin z0.h, p0/m, z0.h, z1.h 973; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 974; VBITS_GE_512-NEXT: ret 975 %op1 = load <32 x half>, ptr %a 976 %op2 = load <32 x half>, ptr %b 977 %res = call <32 x half> @llvm.minimum.v32f16(<32 x half> %op1, <32 x half> %op2) 978 store <32 x half> %res, ptr %a 979 ret void 980} 981 982define void @fmin_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 { 983; CHECK-LABEL: fmin_v64f16: 984; CHECK: // %bb.0: 985; CHECK-NEXT: ptrue p0.h, vl64 986; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 987; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 988; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h 989; CHECK-NEXT: st1h { z0.h }, p0, [x0] 990; CHECK-NEXT: ret 991 %op1 = load <64 x half>, ptr %a 992 %op2 = load <64 x half>, ptr %b 993 %res = call <64 x half> @llvm.minimum.v64f16(<64 x half> %op1, <64 x half> %op2) 994 store <64 x half> %res, ptr %a 995 ret void 996} 997 998define void @fmin_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 { 999; CHECK-LABEL: fmin_v128f16: 1000; CHECK: // %bb.0: 1001; CHECK-NEXT: ptrue p0.h, vl128 1002; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 1003; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 1004; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h 1005; CHECK-NEXT: st1h { z0.h }, p0, [x0] 1006; CHECK-NEXT: ret 1007 %op1 = load <128 x half>, ptr %a 1008 %op2 = load <128 x half>, ptr %b 1009 %res = call <128 x half> @llvm.minimum.v128f16(<128 x half> %op1, <128 x half> %op2) 1010 store <128 x half> %res, ptr %a 1011 ret void 1012} 1013 1014; Don't use SVE for 64-bit vectors. 1015define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 { 1016; CHECK-LABEL: fmin_v2f32: 1017; CHECK: // %bb.0: 1018; CHECK-NEXT: fmin v0.2s, v0.2s, v1.2s 1019; CHECK-NEXT: ret 1020 %res = call <2 x float> @llvm.minimum.v2f32(<2 x float> %op1, <2 x float> %op2) 1021 ret <2 x float> %res 1022} 1023 1024; Don't use SVE for 128-bit vectors. 1025define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 { 1026; CHECK-LABEL: fmin_v4f32: 1027; CHECK: // %bb.0: 1028; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s 1029; CHECK-NEXT: ret 1030 %res = call <4 x float> @llvm.minimum.v4f32(<4 x float> %op1, <4 x float> %op2) 1031 ret <4 x float> %res 1032} 1033 1034define void @fmin_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { 1035; CHECK-LABEL: fmin_v8f32: 1036; CHECK: // %bb.0: 1037; CHECK-NEXT: ptrue p0.s, vl8 1038; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1039; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 1040; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s 1041; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1042; CHECK-NEXT: ret 1043 %op1 = load <8 x float>, ptr %a 1044 %op2 = load <8 x float>, ptr %b 1045 %res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2) 1046 store <8 x float> %res, ptr %a 1047 ret void 1048} 1049 1050define void @fmin_v16f32(ptr %a, ptr %b) #0 { 1051; VBITS_EQ_256-LABEL: fmin_v16f32: 1052; VBITS_EQ_256: // %bb.0: 1053; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 1054; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 1055; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1056; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 1057; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x0] 1058; VBITS_EQ_256-NEXT: ld1w { z3.s }, p0/z, [x1] 1059; VBITS_EQ_256-NEXT: fmin z0.s, p0/m, z0.s, z1.s 1060; VBITS_EQ_256-NEXT: movprfx z1, z2 1061; VBITS_EQ_256-NEXT: fmin z1.s, p0/m, z1.s, z3.s 1062; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 1063; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] 1064; VBITS_EQ_256-NEXT: ret 1065; 1066; VBITS_GE_512-LABEL: fmin_v16f32: 1067; VBITS_GE_512: // %bb.0: 1068; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1069; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1070; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 1071; VBITS_GE_512-NEXT: fmin z0.s, p0/m, z0.s, z1.s 1072; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 1073; VBITS_GE_512-NEXT: ret 1074 %op1 = load <16 x float>, ptr %a 1075 %op2 = load <16 x float>, ptr %b 1076 %res = call <16 x float> @llvm.minimum.v16f32(<16 x float> %op1, <16 x float> %op2) 1077 store <16 x float> %res, ptr %a 1078 ret void 1079} 1080 1081define void @fmin_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1082; CHECK-LABEL: fmin_v32f32: 1083; CHECK: // %bb.0: 1084; CHECK-NEXT: ptrue p0.s, vl32 1085; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1086; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 1087; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s 1088; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1089; CHECK-NEXT: ret 1090 %op1 = load <32 x float>, ptr %a 1091 %op2 = load <32 x float>, ptr %b 1092 %res = call <32 x float> @llvm.minimum.v32f32(<32 x float> %op1, <32 x float> %op2) 1093 store <32 x float> %res, ptr %a 1094 ret void 1095} 1096 1097define void @fmin_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1098; CHECK-LABEL: fmin_v64f32: 1099; CHECK: // %bb.0: 1100; CHECK-NEXT: ptrue p0.s, vl64 1101; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1102; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 1103; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s 1104; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1105; CHECK-NEXT: ret 1106 %op1 = load <64 x float>, ptr %a 1107 %op2 = load <64 x float>, ptr %b 1108 %res = call <64 x float> @llvm.minimum.v64f32(<64 x float> %op1, <64 x float> %op2) 1109 store <64 x float> %res, ptr %a 1110 ret void 1111} 1112 1113; Don't use SVE for 64-bit vectors. 1114define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 { 1115; CHECK-LABEL: fmin_v1f64: 1116; CHECK: // %bb.0: 1117; CHECK-NEXT: fmin d0, d0, d1 1118; CHECK-NEXT: ret 1119 %res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2) 1120 ret <1 x double> %res 1121} 1122 1123; Don't use SVE for 128-bit vectors. 1124define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 { 1125; CHECK-LABEL: fmin_v2f64: 1126; CHECK: // %bb.0: 1127; CHECK-NEXT: fmin v0.2d, v0.2d, v1.2d 1128; CHECK-NEXT: ret 1129 %res = call <2 x double> @llvm.minimum.v2f64(<2 x double> %op1, <2 x double> %op2) 1130 ret <2 x double> %res 1131} 1132 1133define void @fmin_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1134; CHECK-LABEL: fmin_v4f64: 1135; CHECK: // %bb.0: 1136; CHECK-NEXT: ptrue p0.d, vl4 1137; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1138; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 1139; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d 1140; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1141; CHECK-NEXT: ret 1142 %op1 = load <4 x double>, ptr %a 1143 %op2 = load <4 x double>, ptr %b 1144 %res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2) 1145 store <4 x double> %res, ptr %a 1146 ret void 1147} 1148 1149define void @fmin_v8f64(ptr %a, ptr %b) #0 { 1150; VBITS_EQ_256-LABEL: fmin_v8f64: 1151; VBITS_EQ_256: // %bb.0: 1152; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 1153; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 1154; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1155; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 1156; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x0] 1157; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1] 1158; VBITS_EQ_256-NEXT: fmin z0.d, p0/m, z0.d, z1.d 1159; VBITS_EQ_256-NEXT: movprfx z1, z2 1160; VBITS_EQ_256-NEXT: fmin z1.d, p0/m, z1.d, z3.d 1161; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 1162; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] 1163; VBITS_EQ_256-NEXT: ret 1164; 1165; VBITS_GE_512-LABEL: fmin_v8f64: 1166; VBITS_GE_512: // %bb.0: 1167; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1168; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1169; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 1170; VBITS_GE_512-NEXT: fmin z0.d, p0/m, z0.d, z1.d 1171; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 1172; VBITS_GE_512-NEXT: ret 1173 %op1 = load <8 x double>, ptr %a 1174 %op2 = load <8 x double>, ptr %b 1175 %res = call <8 x double> @llvm.minimum.v8f64(<8 x double> %op1, <8 x double> %op2) 1176 store <8 x double> %res, ptr %a 1177 ret void 1178} 1179 1180define void @fmin_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1181; CHECK-LABEL: fmin_v16f64: 1182; CHECK: // %bb.0: 1183; CHECK-NEXT: ptrue p0.d, vl16 1184; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1185; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 1186; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d 1187; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1188; CHECK-NEXT: ret 1189 %op1 = load <16 x double>, ptr %a 1190 %op2 = load <16 x double>, ptr %b 1191 %res = call <16 x double> @llvm.minimum.v16f64(<16 x double> %op1, <16 x double> %op2) 1192 store <16 x double> %res, ptr %a 1193 ret void 1194} 1195 1196define void @fmin_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1197; CHECK-LABEL: fmin_v32f64: 1198; CHECK: // %bb.0: 1199; CHECK-NEXT: ptrue p0.d, vl32 1200; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1201; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 1202; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d 1203; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1204; CHECK-NEXT: ret 1205 %op1 = load <32 x double>, ptr %a 1206 %op2 = load <32 x double>, ptr %b 1207 %res = call <32 x double> @llvm.minimum.v32f64(<32 x double> %op1, <32 x double> %op2) 1208 store <32 x double> %res, ptr %a 1209 ret void 1210} 1211 1212attributes #0 = { "target-features"="+sve" } 1213 1214declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) 1215declare <8 x half> @llvm.minnum.v8f16(<8 x half>, <8 x half>) 1216declare <16 x half> @llvm.minnum.v16f16(<16 x half>, <16 x half>) 1217declare <32 x half> @llvm.minnum.v32f16(<32 x half>, <32 x half>) 1218declare <64 x half> @llvm.minnum.v64f16(<64 x half>, <64 x half>) 1219declare <128 x half> @llvm.minnum.v128f16(<128 x half>, <128 x half>) 1220declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) 1221declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) 1222declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) 1223declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) 1224declare <32 x float> @llvm.minnum.v32f32(<32 x float>, <32 x float>) 1225declare <64 x float> @llvm.minnum.v64f32(<64 x float>, <64 x float>) 1226declare <1 x double> @llvm.minnum.v1f64(<1 x double>, <1 x double>) 1227declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) 1228declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) 1229declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) 1230declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) 1231declare <32 x double> @llvm.minnum.v32f64(<32 x double>, <32 x double>) 1232 1233declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) 1234declare <8 x half> @llvm.maxnum.v8f16(<8 x half>, <8 x half>) 1235declare <16 x half> @llvm.maxnum.v16f16(<16 x half>, <16 x half>) 1236declare <32 x half> @llvm.maxnum.v32f16(<32 x half>, <32 x half>) 1237declare <64 x half> @llvm.maxnum.v64f16(<64 x half>, <64 x half>) 1238declare <128 x half> @llvm.maxnum.v128f16(<128 x half>, <128 x half>) 1239declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) 1240declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) 1241declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) 1242declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) 1243declare <32 x float> @llvm.maxnum.v32f32(<32 x float>, <32 x float>) 1244declare <64 x float> @llvm.maxnum.v64f32(<64 x float>, <64 x float>) 1245declare <1 x double> @llvm.maxnum.v1f64(<1 x double>, <1 x double>) 1246declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) 1247declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) 1248declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) 1249declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) 1250declare <32 x double> @llvm.maxnum.v32f64(<32 x double>, <32 x double>) 1251 1252declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>) 1253declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>) 1254declare <16 x half> @llvm.minimum.v16f16(<16 x half>, <16 x half>) 1255declare <32 x half> @llvm.minimum.v32f16(<32 x half>, <32 x half>) 1256declare <64 x half> @llvm.minimum.v64f16(<64 x half>, <64 x half>) 1257declare <128 x half> @llvm.minimum.v128f16(<128 x half>, <128 x half>) 1258declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) 1259declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) 1260declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>) 1261declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>) 1262declare <32 x float> @llvm.minimum.v32f32(<32 x float>, <32 x float>) 1263declare <64 x float> @llvm.minimum.v64f32(<64 x float>, <64 x float>) 1264declare <1 x double> @llvm.minimum.v1f64(<1 x double>, <1 x double>) 1265declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) 1266declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>) 1267declare <8 x double> @llvm.minimum.v8f64(<8 x double>, <8 x double>) 1268declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>) 1269declare <32 x double> @llvm.minimum.v32f64(<32 x double>, <32 x double>) 1270 1271declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>) 1272declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>) 1273declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>) 1274declare <32 x half> @llvm.maximum.v32f16(<32 x half>, <32 x half>) 1275declare <64 x half> @llvm.maximum.v64f16(<64 x half>, <64 x half>) 1276declare <128 x half> @llvm.maximum.v128f16(<128 x half>, <128 x half>) 1277declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) 1278declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) 1279declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>) 1280declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) 1281declare <32 x float> @llvm.maximum.v32f32(<32 x float>, <32 x float>) 1282declare <64 x float> @llvm.maximum.v64f32(<64 x float>, <64 x float>) 1283declare <1 x double> @llvm.maximum.v1f64(<1 x double>, <1 x double>) 1284declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) 1285declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>) 1286declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>) 1287declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>) 1288declare <32 x double> @llvm.maximum.v32f64(<32 x double>, <32 x double>) 1289