1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -mattr=-bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT 3; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-BF16 4 5define <8 x bfloat> @add_h(<8 x bfloat> %a, <8 x bfloat> %b) { 6; CHECK-CVT-LABEL: add_h: 7; CHECK-CVT: // %bb.0: // %entry 8; CHECK-CVT-NEXT: shll2 v3.4s, v1.8h, #16 9; CHECK-CVT-NEXT: shll2 v4.4s, v0.8h, #16 10; CHECK-CVT-NEXT: movi v2.4s, #1 11; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16 12; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 13; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 14; CHECK-CVT-NEXT: fadd v3.4s, v4.4s, v3.4s 15; CHECK-CVT-NEXT: fadd v0.4s, v0.4s, v1.4s 16; CHECK-CVT-NEXT: and v2.16b, v5.16b, v2.16b 17; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 18; CHECK-CVT-NEXT: fcmeq v5.4s, v3.4s, v3.4s 19; CHECK-CVT-NEXT: add v4.4s, v3.4s, v2.4s 20; CHECK-CVT-NEXT: orr v3.4s, #64, lsl #16 21; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s 22; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s 23; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 24; CHECK-CVT-NEXT: add v4.4s, v4.4s, v1.4s 25; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s 26; CHECK-CVT-NEXT: mov v2.16b, v5.16b 27; CHECK-CVT-NEXT: bsl v2.16b, v4.16b, v3.16b 28; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b 29; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h 30; CHECK-CVT-NEXT: ret 31; 32; CHECK-BF16-LABEL: add_h: 33; CHECK-BF16: // %bb.0: // %entry 34; CHECK-BF16-NEXT: shll v2.4s, v1.4h, #16 35; CHECK-BF16-NEXT: shll v3.4s, v0.4h, #16 36; CHECK-BF16-NEXT: shll2 v1.4s, v1.8h, #16 37; CHECK-BF16-NEXT: shll2 v0.4s, v0.8h, #16 38; CHECK-BF16-NEXT: fadd v2.4s, v3.4s, v2.4s 39; CHECK-BF16-NEXT: fadd v1.4s, v0.4s, v1.4s 40; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s 41; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 42; CHECK-BF16-NEXT: ret 43entry: 44 %0 = fadd <8 x bfloat> %a, %b 45 ret <8 x bfloat> %0 46} 47 48 49define <8 x bfloat> @sub_h(<8 x bfloat> %a, <8 x bfloat> %b) { 50; CHECK-CVT-LABEL: sub_h: 51; CHECK-CVT: // %bb.0: // %entry 52; CHECK-CVT-NEXT: shll2 v3.4s, v1.8h, #16 53; CHECK-CVT-NEXT: shll2 v4.4s, v0.8h, #16 54; CHECK-CVT-NEXT: movi v2.4s, #1 55; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16 56; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 57; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 58; CHECK-CVT-NEXT: fsub v3.4s, v4.4s, v3.4s 59; CHECK-CVT-NEXT: fsub v0.4s, v0.4s, v1.4s 60; CHECK-CVT-NEXT: and v2.16b, v5.16b, v2.16b 61; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 62; CHECK-CVT-NEXT: fcmeq v5.4s, v3.4s, v3.4s 63; CHECK-CVT-NEXT: add v4.4s, v3.4s, v2.4s 64; CHECK-CVT-NEXT: orr v3.4s, #64, lsl #16 65; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s 66; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s 67; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 68; CHECK-CVT-NEXT: add v4.4s, v4.4s, v1.4s 69; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s 70; CHECK-CVT-NEXT: mov v2.16b, v5.16b 71; CHECK-CVT-NEXT: bsl v2.16b, v4.16b, v3.16b 72; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b 73; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h 74; CHECK-CVT-NEXT: ret 75; 76; CHECK-BF16-LABEL: sub_h: 77; CHECK-BF16: // %bb.0: // %entry 78; CHECK-BF16-NEXT: shll v2.4s, v1.4h, #16 79; CHECK-BF16-NEXT: shll v3.4s, v0.4h, #16 80; CHECK-BF16-NEXT: shll2 v1.4s, v1.8h, #16 81; CHECK-BF16-NEXT: shll2 v0.4s, v0.8h, #16 82; CHECK-BF16-NEXT: fsub v2.4s, v3.4s, v2.4s 83; CHECK-BF16-NEXT: fsub v1.4s, v0.4s, v1.4s 84; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s 85; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 86; CHECK-BF16-NEXT: ret 87entry: 88 %0 = fsub <8 x bfloat> %a, %b 89 ret <8 x bfloat> %0 90} 91 92 93define <8 x bfloat> @mul_h(<8 x bfloat> %a, <8 x bfloat> %b) { 94; CHECK-CVT-LABEL: mul_h: 95; CHECK-CVT: // %bb.0: // %entry 96; CHECK-CVT-NEXT: shll2 v3.4s, v1.8h, #16 97; CHECK-CVT-NEXT: shll2 v4.4s, v0.8h, #16 98; CHECK-CVT-NEXT: movi v2.4s, #1 99; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16 100; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 101; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 102; CHECK-CVT-NEXT: fmul v3.4s, v4.4s, v3.4s 103; CHECK-CVT-NEXT: fmul v0.4s, v0.4s, v1.4s 104; CHECK-CVT-NEXT: and v2.16b, v5.16b, v2.16b 105; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 106; CHECK-CVT-NEXT: fcmeq v5.4s, v3.4s, v3.4s 107; CHECK-CVT-NEXT: add v4.4s, v3.4s, v2.4s 108; CHECK-CVT-NEXT: orr v3.4s, #64, lsl #16 109; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s 110; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s 111; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 112; CHECK-CVT-NEXT: add v4.4s, v4.4s, v1.4s 113; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s 114; CHECK-CVT-NEXT: mov v2.16b, v5.16b 115; CHECK-CVT-NEXT: bsl v2.16b, v4.16b, v3.16b 116; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b 117; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h 118; CHECK-CVT-NEXT: ret 119; 120; CHECK-BF16-LABEL: mul_h: 121; CHECK-BF16: // %bb.0: // %entry 122; CHECK-BF16-NEXT: shll v2.4s, v1.4h, #16 123; CHECK-BF16-NEXT: shll v3.4s, v0.4h, #16 124; CHECK-BF16-NEXT: shll2 v1.4s, v1.8h, #16 125; CHECK-BF16-NEXT: shll2 v0.4s, v0.8h, #16 126; CHECK-BF16-NEXT: fmul v2.4s, v3.4s, v2.4s 127; CHECK-BF16-NEXT: fmul v1.4s, v0.4s, v1.4s 128; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s 129; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 130; CHECK-BF16-NEXT: ret 131entry: 132 %0 = fmul <8 x bfloat> %a, %b 133 ret <8 x bfloat> %0 134} 135 136 137define <8 x bfloat> @div_h(<8 x bfloat> %a, <8 x bfloat> %b) { 138; CHECK-CVT-LABEL: div_h: 139; CHECK-CVT: // %bb.0: // %entry 140; CHECK-CVT-NEXT: shll2 v2.4s, v1.8h, #16 141; CHECK-CVT-NEXT: shll2 v3.4s, v0.8h, #16 142; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 143; CHECK-CVT-NEXT: movi v4.4s, #127, msl #8 144; CHECK-CVT-NEXT: fdiv v2.4s, v3.4s, v2.4s 145; CHECK-CVT-NEXT: shll v3.4s, v0.4h, #16 146; CHECK-CVT-NEXT: ushr v0.4s, v0.4s, #16 147; CHECK-CVT-NEXT: fdiv v1.4s, v3.4s, v1.4s 148; CHECK-CVT-NEXT: movi v3.4s, #1 149; CHECK-CVT-NEXT: and v0.16b, v0.16b, v3.16b 150; CHECK-CVT-NEXT: add v0.4s, v0.4s, v4.4s 151; CHECK-CVT-NEXT: fcmeq v4.4s, v2.4s, v2.4s 152; CHECK-CVT-NEXT: add v3.4s, v2.4s, v0.4s 153; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16 154; CHECK-CVT-NEXT: fcmeq v5.4s, v1.4s, v1.4s 155; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 156; CHECK-CVT-NEXT: orr v1.4s, #64, lsl #16 157; CHECK-CVT-NEXT: bit v2.16b, v3.16b, v4.16b 158; CHECK-CVT-NEXT: bif v0.16b, v1.16b, v5.16b 159; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h 160; CHECK-CVT-NEXT: ret 161; 162; CHECK-BF16-LABEL: div_h: 163; CHECK-BF16: // %bb.0: // %entry 164; CHECK-BF16-NEXT: shll v2.4s, v1.4h, #16 165; CHECK-BF16-NEXT: shll v3.4s, v0.4h, #16 166; CHECK-BF16-NEXT: shll2 v1.4s, v1.8h, #16 167; CHECK-BF16-NEXT: shll2 v0.4s, v0.8h, #16 168; CHECK-BF16-NEXT: fdiv v2.4s, v3.4s, v2.4s 169; CHECK-BF16-NEXT: fdiv v1.4s, v0.4s, v1.4s 170; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s 171; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 172; CHECK-BF16-NEXT: ret 173entry: 174 %0 = fdiv <8 x bfloat> %a, %b 175 ret <8 x bfloat> %0 176} 177 178 179define <8 x bfloat> @load_h(ptr %a) { 180; CHECK-LABEL: load_h: 181; CHECK: // %bb.0: // %entry 182; CHECK-NEXT: ldr q0, [x0] 183; CHECK-NEXT: ret 184entry: 185 %0 = load <8 x bfloat>, ptr %a, align 4 186 ret <8 x bfloat> %0 187} 188 189 190define void @store_h(ptr %a, <8 x bfloat> %b) { 191; CHECK-LABEL: store_h: 192; CHECK: // %bb.0: // %entry 193; CHECK-NEXT: str q0, [x0] 194; CHECK-NEXT: ret 195entry: 196 store <8 x bfloat> %b, ptr %a, align 4 197 ret void 198} 199 200define <8 x bfloat> @s_to_h(<8 x float> %a) { 201; CHECK-CVT-LABEL: s_to_h: 202; CHECK-CVT: // %bb.0: 203; CHECK-CVT-NEXT: movi v2.4s, #1 204; CHECK-CVT-NEXT: movi v3.4s, #127, msl #8 205; CHECK-CVT-NEXT: ushr v4.4s, v1.4s, #16 206; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16 207; CHECK-CVT-NEXT: and v4.16b, v4.16b, v2.16b 208; CHECK-CVT-NEXT: add v6.4s, v1.4s, v3.4s 209; CHECK-CVT-NEXT: and v2.16b, v5.16b, v2.16b 210; CHECK-CVT-NEXT: add v3.4s, v0.4s, v3.4s 211; CHECK-CVT-NEXT: fcmeq v5.4s, v1.4s, v1.4s 212; CHECK-CVT-NEXT: orr v1.4s, #64, lsl #16 213; CHECK-CVT-NEXT: add v4.4s, v4.4s, v6.4s 214; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s 215; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 216; CHECK-CVT-NEXT: add v2.4s, v2.4s, v3.4s 217; CHECK-CVT-NEXT: bit v1.16b, v4.16b, v5.16b 218; CHECK-CVT-NEXT: bit v0.16b, v2.16b, v6.16b 219; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v1.8h 220; CHECK-CVT-NEXT: ret 221; 222; CHECK-BF16-LABEL: s_to_h: 223; CHECK-BF16: // %bb.0: 224; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 225; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 226; CHECK-BF16-NEXT: ret 227 %1 = fptrunc <8 x float> %a to <8 x bfloat> 228 ret <8 x bfloat> %1 229} 230 231define <8 x bfloat> @d_to_h(<8 x double> %a) { 232; CHECK-CVT-LABEL: d_to_h: 233; CHECK-CVT: // %bb.0: 234; CHECK-CVT-NEXT: fcvtxn v2.2s, v2.2d 235; CHECK-CVT-NEXT: fcvtxn v0.2s, v0.2d 236; CHECK-CVT-NEXT: fcvtxn2 v2.4s, v3.2d 237; CHECK-CVT-NEXT: fcvtxn2 v0.4s, v1.2d 238; CHECK-CVT-NEXT: movi v1.4s, #1 239; CHECK-CVT-NEXT: movi v3.4s, #127, msl #8 240; CHECK-CVT-NEXT: ushr v4.4s, v2.4s, #16 241; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16 242; CHECK-CVT-NEXT: add v6.4s, v2.4s, v3.4s 243; CHECK-CVT-NEXT: add v3.4s, v0.4s, v3.4s 244; CHECK-CVT-NEXT: and v4.16b, v4.16b, v1.16b 245; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b 246; CHECK-CVT-NEXT: fcmeq v5.4s, v2.4s, v2.4s 247; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16 248; CHECK-CVT-NEXT: add v4.4s, v4.4s, v6.4s 249; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s 250; CHECK-CVT-NEXT: add v1.4s, v1.4s, v3.4s 251; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 252; CHECK-CVT-NEXT: bit v2.16b, v4.16b, v5.16b 253; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b 254; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h 255; CHECK-CVT-NEXT: ret 256; 257; CHECK-BF16-LABEL: d_to_h: 258; CHECK-BF16: // %bb.0: 259; CHECK-BF16-NEXT: fcvtxn v0.2s, v0.2d 260; CHECK-BF16-NEXT: fcvtxn v2.2s, v2.2d 261; CHECK-BF16-NEXT: fcvtxn2 v0.4s, v1.2d 262; CHECK-BF16-NEXT: fcvtxn2 v2.4s, v3.2d 263; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 264; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v2.4s 265; CHECK-BF16-NEXT: ret 266 %1 = fptrunc <8 x double> %a to <8 x bfloat> 267 ret <8 x bfloat> %1 268} 269 270define <8 x float> @h_to_s(<8 x bfloat> %a) { 271; CHECK-LABEL: h_to_s: 272; CHECK: // %bb.0: 273; CHECK-NEXT: shll2 v1.4s, v0.8h, #16 274; CHECK-NEXT: shll v0.4s, v0.4h, #16 275; CHECK-NEXT: ret 276 %1 = fpext <8 x bfloat> %a to <8 x float> 277 ret <8 x float> %1 278} 279 280define <8 x double> @h_to_d(<8 x bfloat> %a) { 281; CHECK-LABEL: h_to_d: 282; CHECK: // %bb.0: 283; CHECK-NEXT: shll v1.4s, v0.4h, #16 284; CHECK-NEXT: shll2 v2.4s, v0.8h, #16 285; CHECK-NEXT: fcvtl v0.2d, v1.2s 286; CHECK-NEXT: fcvtl2 v3.2d, v2.4s 287; CHECK-NEXT: fcvtl2 v1.2d, v1.4s 288; CHECK-NEXT: fcvtl v2.2d, v2.2s 289; CHECK-NEXT: ret 290 %1 = fpext <8 x bfloat> %a to <8 x double> 291 ret <8 x double> %1 292} 293 294 295define <8 x bfloat> @bitcast_i_to_h(float, <8 x i16> %a) { 296; CHECK-LABEL: bitcast_i_to_h: 297; CHECK: // %bb.0: 298; CHECK-NEXT: mov v0.16b, v1.16b 299; CHECK-NEXT: ret 300 %2 = bitcast <8 x i16> %a to <8 x bfloat> 301 ret <8 x bfloat> %2 302} 303 304define <8 x i16> @bitcast_h_to_i(float, <8 x bfloat> %a) { 305; CHECK-LABEL: bitcast_h_to_i: 306; CHECK: // %bb.0: 307; CHECK-NEXT: mov v0.16b, v1.16b 308; CHECK-NEXT: ret 309 %2 = bitcast <8 x bfloat> %a to <8 x i16> 310 ret <8 x i16> %2 311} 312 313define <4 x bfloat> @sitofp_v4i8(<4 x i8> %a) #0 { 314; CHECK-CVT-LABEL: sitofp_v4i8: 315; CHECK-CVT: // %bb.0: 316; CHECK-CVT-NEXT: shl v0.4h, v0.4h, #8 317; CHECK-CVT-NEXT: movi v1.4s, #1 318; CHECK-CVT-NEXT: sshr v0.4h, v0.4h, #8 319; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 320; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s 321; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 322; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 323; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 324; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 325; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 326; CHECK-CVT-NEXT: ret 327; 328; CHECK-BF16-LABEL: sitofp_v4i8: 329; CHECK-BF16: // %bb.0: 330; CHECK-BF16-NEXT: shl v0.4h, v0.4h, #8 331; CHECK-BF16-NEXT: sshr v0.4h, v0.4h, #8 332; CHECK-BF16-NEXT: sshll v0.4s, v0.4h, #0 333; CHECK-BF16-NEXT: scvtf v0.4s, v0.4s 334; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 335; CHECK-BF16-NEXT: ret 336 %1 = sitofp <4 x i8> %a to <4 x bfloat> 337 ret <4 x bfloat> %1 338} 339 340define <8 x bfloat> @sitofp_v8i8(<8 x i8> %a) #0 { 341; CHECK-CVT-LABEL: sitofp_v8i8: 342; CHECK-CVT: // %bb.0: 343; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0 344; CHECK-CVT-NEXT: movi v1.4s, #1 345; CHECK-CVT-NEXT: movi v4.4s, #127, msl #8 346; CHECK-CVT-NEXT: sshll v2.4s, v0.4h, #0 347; CHECK-CVT-NEXT: sshll2 v0.4s, v0.8h, #0 348; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s 349; CHECK-CVT-NEXT: scvtf v3.4s, v0.4s 350; CHECK-CVT-NEXT: ushr v0.4s, v2.4s, #16 351; CHECK-CVT-NEXT: ushr v5.4s, v3.4s, #16 352; CHECK-CVT-NEXT: and v0.16b, v0.16b, v1.16b 353; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b 354; CHECK-CVT-NEXT: add v0.4s, v0.4s, v4.4s 355; CHECK-CVT-NEXT: add v1.4s, v1.4s, v4.4s 356; CHECK-CVT-NEXT: addhn v0.4h, v2.4s, v0.4s 357; CHECK-CVT-NEXT: addhn2 v0.8h, v3.4s, v1.4s 358; CHECK-CVT-NEXT: ret 359; 360; CHECK-BF16-LABEL: sitofp_v8i8: 361; CHECK-BF16: // %bb.0: 362; CHECK-BF16-NEXT: sshll v0.8h, v0.8b, #0 363; CHECK-BF16-NEXT: sshll v1.4s, v0.4h, #0 364; CHECK-BF16-NEXT: sshll2 v2.4s, v0.8h, #0 365; CHECK-BF16-NEXT: scvtf v1.4s, v1.4s 366; CHECK-BF16-NEXT: bfcvtn v0.4h, v1.4s 367; CHECK-BF16-NEXT: scvtf v1.4s, v2.4s 368; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 369; CHECK-BF16-NEXT: ret 370 %1 = sitofp <8 x i8> %a to <8 x bfloat> 371 ret <8 x bfloat> %1 372} 373 374define <16 x bfloat> @sitofp_v16i8(<16 x i8> %a) #0 { 375; CHECK-CVT-LABEL: sitofp_v16i8: 376; CHECK-CVT: // %bb.0: 377; CHECK-CVT-NEXT: sshll2 v2.8h, v0.16b, #0 378; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0 379; CHECK-CVT-NEXT: movi v1.4s, #1 380; CHECK-CVT-NEXT: movi v7.4s, #127, msl #8 381; CHECK-CVT-NEXT: sshll v3.4s, v2.4h, #0 382; CHECK-CVT-NEXT: sshll v4.4s, v0.4h, #0 383; CHECK-CVT-NEXT: sshll2 v2.4s, v2.8h, #0 384; CHECK-CVT-NEXT: sshll2 v0.4s, v0.8h, #0 385; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s 386; CHECK-CVT-NEXT: scvtf v4.4s, v4.4s 387; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s 388; CHECK-CVT-NEXT: scvtf v6.4s, v0.4s 389; CHECK-CVT-NEXT: ushr v5.4s, v3.4s, #16 390; CHECK-CVT-NEXT: ushr v0.4s, v4.4s, #16 391; CHECK-CVT-NEXT: ushr v16.4s, v2.4s, #16 392; CHECK-CVT-NEXT: ushr v17.4s, v6.4s, #16 393; CHECK-CVT-NEXT: and v5.16b, v5.16b, v1.16b 394; CHECK-CVT-NEXT: and v0.16b, v0.16b, v1.16b 395; CHECK-CVT-NEXT: and v16.16b, v16.16b, v1.16b 396; CHECK-CVT-NEXT: and v17.16b, v17.16b, v1.16b 397; CHECK-CVT-NEXT: add v5.4s, v5.4s, v7.4s 398; CHECK-CVT-NEXT: add v0.4s, v0.4s, v7.4s 399; CHECK-CVT-NEXT: addhn v1.4h, v3.4s, v5.4s 400; CHECK-CVT-NEXT: addhn v0.4h, v4.4s, v0.4s 401; CHECK-CVT-NEXT: add v3.4s, v16.4s, v7.4s 402; CHECK-CVT-NEXT: add v4.4s, v17.4s, v7.4s 403; CHECK-CVT-NEXT: addhn2 v1.8h, v2.4s, v3.4s 404; CHECK-CVT-NEXT: addhn2 v0.8h, v6.4s, v4.4s 405; CHECK-CVT-NEXT: ret 406; 407; CHECK-BF16-LABEL: sitofp_v16i8: 408; CHECK-BF16: // %bb.0: 409; CHECK-BF16-NEXT: sshll2 v1.8h, v0.16b, #0 410; CHECK-BF16-NEXT: sshll v0.8h, v0.8b, #0 411; CHECK-BF16-NEXT: sshll v2.4s, v1.4h, #0 412; CHECK-BF16-NEXT: sshll v3.4s, v0.4h, #0 413; CHECK-BF16-NEXT: sshll2 v4.4s, v1.8h, #0 414; CHECK-BF16-NEXT: sshll2 v5.4s, v0.8h, #0 415; CHECK-BF16-NEXT: scvtf v2.4s, v2.4s 416; CHECK-BF16-NEXT: scvtf v3.4s, v3.4s 417; CHECK-BF16-NEXT: bfcvtn v1.4h, v2.4s 418; CHECK-BF16-NEXT: scvtf v2.4s, v4.4s 419; CHECK-BF16-NEXT: bfcvtn v0.4h, v3.4s 420; CHECK-BF16-NEXT: scvtf v3.4s, v5.4s 421; CHECK-BF16-NEXT: bfcvtn2 v1.8h, v2.4s 422; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v3.4s 423; CHECK-BF16-NEXT: ret 424 %1 = sitofp <16 x i8> %a to <16 x bfloat> 425 ret <16 x bfloat> %1 426} 427 428define <8 x bfloat> @sitofp_i16(<8 x i16> %a) #0 { 429; CHECK-CVT-LABEL: sitofp_i16: 430; CHECK-CVT: // %bb.0: 431; CHECK-CVT-NEXT: sshll v2.4s, v0.4h, #0 432; CHECK-CVT-NEXT: sshll2 v0.4s, v0.8h, #0 433; CHECK-CVT-NEXT: movi v1.4s, #1 434; CHECK-CVT-NEXT: movi v4.4s, #127, msl #8 435; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s 436; CHECK-CVT-NEXT: scvtf v3.4s, v0.4s 437; CHECK-CVT-NEXT: ushr v0.4s, v2.4s, #16 438; CHECK-CVT-NEXT: ushr v5.4s, v3.4s, #16 439; CHECK-CVT-NEXT: and v0.16b, v0.16b, v1.16b 440; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b 441; CHECK-CVT-NEXT: add v0.4s, v0.4s, v4.4s 442; CHECK-CVT-NEXT: add v1.4s, v1.4s, v4.4s 443; CHECK-CVT-NEXT: addhn v0.4h, v2.4s, v0.4s 444; CHECK-CVT-NEXT: addhn2 v0.8h, v3.4s, v1.4s 445; CHECK-CVT-NEXT: ret 446; 447; CHECK-BF16-LABEL: sitofp_i16: 448; CHECK-BF16: // %bb.0: 449; CHECK-BF16-NEXT: sshll v1.4s, v0.4h, #0 450; CHECK-BF16-NEXT: sshll2 v2.4s, v0.8h, #0 451; CHECK-BF16-NEXT: scvtf v1.4s, v1.4s 452; CHECK-BF16-NEXT: bfcvtn v0.4h, v1.4s 453; CHECK-BF16-NEXT: scvtf v1.4s, v2.4s 454; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 455; CHECK-BF16-NEXT: ret 456 %1 = sitofp <8 x i16> %a to <8 x bfloat> 457 ret <8 x bfloat> %1 458} 459 460define <8 x bfloat> @sitofp_i32(<8 x i32> %a) #0 { 461; CHECK-CVT-LABEL: sitofp_i32: 462; CHECK-CVT: // %bb.0: 463; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s 464; CHECK-CVT-NEXT: movi v2.4s, #1 465; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s 466; CHECK-CVT-NEXT: movi v5.4s, #127, msl #8 467; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16 468; CHECK-CVT-NEXT: ushr v4.4s, v1.4s, #16 469; CHECK-CVT-NEXT: and v3.16b, v3.16b, v2.16b 470; CHECK-CVT-NEXT: and v2.16b, v4.16b, v2.16b 471; CHECK-CVT-NEXT: add v0.4s, v3.4s, v0.4s 472; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s 473; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v5.4s 474; CHECK-CVT-NEXT: addhn2 v0.8h, v1.4s, v5.4s 475; CHECK-CVT-NEXT: ret 476; 477; CHECK-BF16-LABEL: sitofp_i32: 478; CHECK-BF16: // %bb.0: 479; CHECK-BF16-NEXT: scvtf v0.4s, v0.4s 480; CHECK-BF16-NEXT: scvtf v1.4s, v1.4s 481; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 482; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 483; CHECK-BF16-NEXT: ret 484 %1 = sitofp <8 x i32> %a to <8 x bfloat> 485 ret <8 x bfloat> %1 486} 487 488 489define <8 x bfloat> @sitofp_i64(<8 x i64> %a) #0 { 490; CHECK-CVT-LABEL: sitofp_i64: 491; CHECK-CVT: // %bb.0: 492; CHECK-CVT-NEXT: scvtf v2.2d, v2.2d 493; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d 494; CHECK-CVT-NEXT: scvtf v3.2d, v3.2d 495; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d 496; CHECK-CVT-NEXT: fcvtn v2.2s, v2.2d 497; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d 498; CHECK-CVT-NEXT: fcvtn2 v2.4s, v3.2d 499; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d 500; CHECK-CVT-NEXT: movi v1.4s, #1 501; CHECK-CVT-NEXT: movi v3.4s, #127, msl #8 502; CHECK-CVT-NEXT: ushr v4.4s, v2.4s, #16 503; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16 504; CHECK-CVT-NEXT: add v6.4s, v2.4s, v3.4s 505; CHECK-CVT-NEXT: add v3.4s, v0.4s, v3.4s 506; CHECK-CVT-NEXT: and v4.16b, v4.16b, v1.16b 507; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b 508; CHECK-CVT-NEXT: fcmeq v5.4s, v2.4s, v2.4s 509; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16 510; CHECK-CVT-NEXT: add v4.4s, v4.4s, v6.4s 511; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s 512; CHECK-CVT-NEXT: add v1.4s, v1.4s, v3.4s 513; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 514; CHECK-CVT-NEXT: bit v2.16b, v4.16b, v5.16b 515; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b 516; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h 517; CHECK-CVT-NEXT: ret 518; 519; CHECK-BF16-LABEL: sitofp_i64: 520; CHECK-BF16: // %bb.0: 521; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d 522; CHECK-BF16-NEXT: scvtf v2.2d, v2.2d 523; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d 524; CHECK-BF16-NEXT: scvtf v3.2d, v3.2d 525; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d 526; CHECK-BF16-NEXT: fcvtn v2.2s, v2.2d 527; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d 528; CHECK-BF16-NEXT: fcvtn2 v2.4s, v3.2d 529; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 530; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v2.4s 531; CHECK-BF16-NEXT: ret 532 %1 = sitofp <8 x i64> %a to <8 x bfloat> 533 ret <8 x bfloat> %1 534} 535 536define <4 x bfloat> @uitofp_v4i8(<4 x i8> %a) #0 { 537; CHECK-CVT-LABEL: uitofp_v4i8: 538; CHECK-CVT: // %bb.0: 539; CHECK-CVT-NEXT: bic v0.4h, #255, lsl #8 540; CHECK-CVT-NEXT: movi v1.4s, #1 541; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 542; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s 543; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 544; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 545; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 546; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 547; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 548; CHECK-CVT-NEXT: ret 549; 550; CHECK-BF16-LABEL: uitofp_v4i8: 551; CHECK-BF16: // %bb.0: 552; CHECK-BF16-NEXT: bic v0.4h, #255, lsl #8 553; CHECK-BF16-NEXT: ushll v0.4s, v0.4h, #0 554; CHECK-BF16-NEXT: ucvtf v0.4s, v0.4s 555; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 556; CHECK-BF16-NEXT: ret 557 %1 = uitofp <4 x i8> %a to <4 x bfloat> 558 ret <4 x bfloat> %1 559} 560 561define <8 x bfloat> @uitofp_v8i8(<8 x i8> %a) #0 { 562; CHECK-CVT-LABEL: uitofp_v8i8: 563; CHECK-CVT: // %bb.0: 564; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0 565; CHECK-CVT-NEXT: movi v1.4s, #1 566; CHECK-CVT-NEXT: movi v4.4s, #127, msl #8 567; CHECK-CVT-NEXT: ushll v2.4s, v0.4h, #0 568; CHECK-CVT-NEXT: ushll2 v0.4s, v0.8h, #0 569; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s 570; CHECK-CVT-NEXT: ucvtf v3.4s, v0.4s 571; CHECK-CVT-NEXT: ushr v0.4s, v2.4s, #16 572; CHECK-CVT-NEXT: ushr v5.4s, v3.4s, #16 573; CHECK-CVT-NEXT: and v0.16b, v0.16b, v1.16b 574; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b 575; CHECK-CVT-NEXT: add v0.4s, v0.4s, v4.4s 576; CHECK-CVT-NEXT: add v1.4s, v1.4s, v4.4s 577; CHECK-CVT-NEXT: addhn v0.4h, v2.4s, v0.4s 578; CHECK-CVT-NEXT: addhn2 v0.8h, v3.4s, v1.4s 579; CHECK-CVT-NEXT: ret 580; 581; CHECK-BF16-LABEL: uitofp_v8i8: 582; CHECK-BF16: // %bb.0: 583; CHECK-BF16-NEXT: ushll v0.8h, v0.8b, #0 584; CHECK-BF16-NEXT: ushll v1.4s, v0.4h, #0 585; CHECK-BF16-NEXT: ushll2 v2.4s, v0.8h, #0 586; CHECK-BF16-NEXT: ucvtf v1.4s, v1.4s 587; CHECK-BF16-NEXT: bfcvtn v0.4h, v1.4s 588; CHECK-BF16-NEXT: ucvtf v1.4s, v2.4s 589; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 590; CHECK-BF16-NEXT: ret 591 %1 = uitofp <8 x i8> %a to <8 x bfloat> 592 ret <8 x bfloat> %1 593} 594 595define <16 x bfloat> @uitofp_v16i8(<16 x i8> %a) #0 { 596; CHECK-CVT-LABEL: uitofp_v16i8: 597; CHECK-CVT: // %bb.0: 598; CHECK-CVT-NEXT: ushll2 v2.8h, v0.16b, #0 599; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0 600; CHECK-CVT-NEXT: movi v1.4s, #1 601; CHECK-CVT-NEXT: movi v7.4s, #127, msl #8 602; CHECK-CVT-NEXT: ushll v3.4s, v2.4h, #0 603; CHECK-CVT-NEXT: ushll v4.4s, v0.4h, #0 604; CHECK-CVT-NEXT: ushll2 v2.4s, v2.8h, #0 605; CHECK-CVT-NEXT: ushll2 v0.4s, v0.8h, #0 606; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s 607; CHECK-CVT-NEXT: ucvtf v4.4s, v4.4s 608; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s 609; CHECK-CVT-NEXT: ucvtf v6.4s, v0.4s 610; CHECK-CVT-NEXT: ushr v5.4s, v3.4s, #16 611; CHECK-CVT-NEXT: ushr v0.4s, v4.4s, #16 612; CHECK-CVT-NEXT: ushr v16.4s, v2.4s, #16 613; CHECK-CVT-NEXT: ushr v17.4s, v6.4s, #16 614; CHECK-CVT-NEXT: and v5.16b, v5.16b, v1.16b 615; CHECK-CVT-NEXT: and v0.16b, v0.16b, v1.16b 616; CHECK-CVT-NEXT: and v16.16b, v16.16b, v1.16b 617; CHECK-CVT-NEXT: and v17.16b, v17.16b, v1.16b 618; CHECK-CVT-NEXT: add v5.4s, v5.4s, v7.4s 619; CHECK-CVT-NEXT: add v0.4s, v0.4s, v7.4s 620; CHECK-CVT-NEXT: addhn v1.4h, v3.4s, v5.4s 621; CHECK-CVT-NEXT: addhn v0.4h, v4.4s, v0.4s 622; CHECK-CVT-NEXT: add v3.4s, v16.4s, v7.4s 623; CHECK-CVT-NEXT: add v4.4s, v17.4s, v7.4s 624; CHECK-CVT-NEXT: addhn2 v1.8h, v2.4s, v3.4s 625; CHECK-CVT-NEXT: addhn2 v0.8h, v6.4s, v4.4s 626; CHECK-CVT-NEXT: ret 627; 628; CHECK-BF16-LABEL: uitofp_v16i8: 629; CHECK-BF16: // %bb.0: 630; CHECK-BF16-NEXT: ushll2 v1.8h, v0.16b, #0 631; CHECK-BF16-NEXT: ushll v0.8h, v0.8b, #0 632; CHECK-BF16-NEXT: ushll v2.4s, v1.4h, #0 633; CHECK-BF16-NEXT: ushll v3.4s, v0.4h, #0 634; CHECK-BF16-NEXT: ushll2 v4.4s, v1.8h, #0 635; CHECK-BF16-NEXT: ushll2 v5.4s, v0.8h, #0 636; CHECK-BF16-NEXT: ucvtf v2.4s, v2.4s 637; CHECK-BF16-NEXT: ucvtf v3.4s, v3.4s 638; CHECK-BF16-NEXT: bfcvtn v1.4h, v2.4s 639; CHECK-BF16-NEXT: ucvtf v2.4s, v4.4s 640; CHECK-BF16-NEXT: bfcvtn v0.4h, v3.4s 641; CHECK-BF16-NEXT: ucvtf v3.4s, v5.4s 642; CHECK-BF16-NEXT: bfcvtn2 v1.8h, v2.4s 643; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v3.4s 644; CHECK-BF16-NEXT: ret 645 %1 = uitofp <16 x i8> %a to <16 x bfloat> 646 ret <16 x bfloat> %1 647} 648 649 650define <8 x bfloat> @uitofp_i16(<8 x i16> %a) #0 { 651; CHECK-CVT-LABEL: uitofp_i16: 652; CHECK-CVT: // %bb.0: 653; CHECK-CVT-NEXT: ushll v2.4s, v0.4h, #0 654; CHECK-CVT-NEXT: ushll2 v0.4s, v0.8h, #0 655; CHECK-CVT-NEXT: movi v1.4s, #1 656; CHECK-CVT-NEXT: movi v4.4s, #127, msl #8 657; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s 658; CHECK-CVT-NEXT: ucvtf v3.4s, v0.4s 659; CHECK-CVT-NEXT: ushr v0.4s, v2.4s, #16 660; CHECK-CVT-NEXT: ushr v5.4s, v3.4s, #16 661; CHECK-CVT-NEXT: and v0.16b, v0.16b, v1.16b 662; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b 663; CHECK-CVT-NEXT: add v0.4s, v0.4s, v4.4s 664; CHECK-CVT-NEXT: add v1.4s, v1.4s, v4.4s 665; CHECK-CVT-NEXT: addhn v0.4h, v2.4s, v0.4s 666; CHECK-CVT-NEXT: addhn2 v0.8h, v3.4s, v1.4s 667; CHECK-CVT-NEXT: ret 668; 669; CHECK-BF16-LABEL: uitofp_i16: 670; CHECK-BF16: // %bb.0: 671; CHECK-BF16-NEXT: ushll v1.4s, v0.4h, #0 672; CHECK-BF16-NEXT: ushll2 v2.4s, v0.8h, #0 673; CHECK-BF16-NEXT: ucvtf v1.4s, v1.4s 674; CHECK-BF16-NEXT: bfcvtn v0.4h, v1.4s 675; CHECK-BF16-NEXT: ucvtf v1.4s, v2.4s 676; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 677; CHECK-BF16-NEXT: ret 678 %1 = uitofp <8 x i16> %a to <8 x bfloat> 679 ret <8 x bfloat> %1 680} 681 682 683define <8 x bfloat> @uitofp_i32(<8 x i32> %a) #0 { 684; CHECK-CVT-LABEL: uitofp_i32: 685; CHECK-CVT: // %bb.0: 686; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s 687; CHECK-CVT-NEXT: movi v2.4s, #1 688; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s 689; CHECK-CVT-NEXT: movi v5.4s, #127, msl #8 690; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16 691; CHECK-CVT-NEXT: ushr v4.4s, v1.4s, #16 692; CHECK-CVT-NEXT: and v3.16b, v3.16b, v2.16b 693; CHECK-CVT-NEXT: and v2.16b, v4.16b, v2.16b 694; CHECK-CVT-NEXT: add v0.4s, v3.4s, v0.4s 695; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s 696; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v5.4s 697; CHECK-CVT-NEXT: addhn2 v0.8h, v1.4s, v5.4s 698; CHECK-CVT-NEXT: ret 699; 700; CHECK-BF16-LABEL: uitofp_i32: 701; CHECK-BF16: // %bb.0: 702; CHECK-BF16-NEXT: ucvtf v0.4s, v0.4s 703; CHECK-BF16-NEXT: ucvtf v1.4s, v1.4s 704; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 705; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v1.4s 706; CHECK-BF16-NEXT: ret 707 %1 = uitofp <8 x i32> %a to <8 x bfloat> 708 ret <8 x bfloat> %1 709} 710 711 712define <8 x bfloat> @uitofp_i64(<8 x i64> %a) #0 { 713; CHECK-CVT-LABEL: uitofp_i64: 714; CHECK-CVT: // %bb.0: 715; CHECK-CVT-NEXT: ucvtf v2.2d, v2.2d 716; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d 717; CHECK-CVT-NEXT: ucvtf v3.2d, v3.2d 718; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d 719; CHECK-CVT-NEXT: fcvtn v2.2s, v2.2d 720; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d 721; CHECK-CVT-NEXT: fcvtn2 v2.4s, v3.2d 722; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d 723; CHECK-CVT-NEXT: movi v1.4s, #1 724; CHECK-CVT-NEXT: movi v3.4s, #127, msl #8 725; CHECK-CVT-NEXT: ushr v4.4s, v2.4s, #16 726; CHECK-CVT-NEXT: ushr v5.4s, v0.4s, #16 727; CHECK-CVT-NEXT: add v6.4s, v2.4s, v3.4s 728; CHECK-CVT-NEXT: add v3.4s, v0.4s, v3.4s 729; CHECK-CVT-NEXT: and v4.16b, v4.16b, v1.16b 730; CHECK-CVT-NEXT: and v1.16b, v5.16b, v1.16b 731; CHECK-CVT-NEXT: fcmeq v5.4s, v2.4s, v2.4s 732; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16 733; CHECK-CVT-NEXT: add v4.4s, v4.4s, v6.4s 734; CHECK-CVT-NEXT: fcmeq v6.4s, v0.4s, v0.4s 735; CHECK-CVT-NEXT: add v1.4s, v1.4s, v3.4s 736; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 737; CHECK-CVT-NEXT: bit v2.16b, v4.16b, v5.16b 738; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v6.16b 739; CHECK-CVT-NEXT: uzp2 v0.8h, v0.8h, v2.8h 740; CHECK-CVT-NEXT: ret 741; 742; CHECK-BF16-LABEL: uitofp_i64: 743; CHECK-BF16: // %bb.0: 744; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d 745; CHECK-BF16-NEXT: ucvtf v2.2d, v2.2d 746; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d 747; CHECK-BF16-NEXT: ucvtf v3.2d, v3.2d 748; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d 749; CHECK-BF16-NEXT: fcvtn v2.2s, v2.2d 750; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d 751; CHECK-BF16-NEXT: fcvtn2 v2.4s, v3.2d 752; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 753; CHECK-BF16-NEXT: bfcvtn2 v0.8h, v2.4s 754; CHECK-BF16-NEXT: ret 755 %1 = uitofp <8 x i64> %a to <8 x bfloat> 756 ret <8 x bfloat> %1 757} 758 759define void @test_insert_at_zero(bfloat %a, ptr %b) #0 { 760; CHECK-LABEL: test_insert_at_zero: 761; CHECK: // %bb.0: 762; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 763; CHECK-NEXT: str q0, [x0] 764; CHECK-NEXT: ret 765 %1 = insertelement <8 x bfloat> undef, bfloat %a, i64 0 766 store <8 x bfloat> %1, ptr %b, align 4 767 ret void 768} 769 770define <8 x i8> @fptosi_i8(<8 x bfloat> %a) #0 { 771; CHECK-LABEL: fptosi_i8: 772; CHECK: // %bb.0: 773; CHECK-NEXT: shll2 v1.4s, v0.8h, #16 774; CHECK-NEXT: shll v0.4s, v0.4h, #16 775; CHECK-NEXT: fcvtzs v1.4s, v1.4s 776; CHECK-NEXT: fcvtzs v0.4s, v0.4s 777; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 778; CHECK-NEXT: xtn v0.8b, v0.8h 779; CHECK-NEXT: ret 780 %1 = fptosi<8 x bfloat> %a to <8 x i8> 781 ret <8 x i8> %1 782} 783 784define <8 x i16> @fptosi_i16(<8 x bfloat> %a) #0 { 785; CHECK-LABEL: fptosi_i16: 786; CHECK: // %bb.0: 787; CHECK-NEXT: shll2 v1.4s, v0.8h, #16 788; CHECK-NEXT: shll v0.4s, v0.4h, #16 789; CHECK-NEXT: fcvtzs v1.4s, v1.4s 790; CHECK-NEXT: fcvtzs v0.4s, v0.4s 791; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 792; CHECK-NEXT: ret 793 %1 = fptosi<8 x bfloat> %a to <8 x i16> 794 ret <8 x i16> %1 795} 796 797define <8 x i8> @fptoui_i8(<8 x bfloat> %a) #0 { 798; CHECK-LABEL: fptoui_i8: 799; CHECK: // %bb.0: 800; CHECK-NEXT: shll2 v1.4s, v0.8h, #16 801; CHECK-NEXT: shll v0.4s, v0.4h, #16 802; CHECK-NEXT: fcvtzu v1.4s, v1.4s 803; CHECK-NEXT: fcvtzu v0.4s, v0.4s 804; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 805; CHECK-NEXT: xtn v0.8b, v0.8h 806; CHECK-NEXT: ret 807 %1 = fptoui<8 x bfloat> %a to <8 x i8> 808 ret <8 x i8> %1 809} 810 811define <8 x i16> @fptoui_i16(<8 x bfloat> %a) #0 { 812; CHECK-LABEL: fptoui_i16: 813; CHECK: // %bb.0: 814; CHECK-NEXT: shll2 v1.4s, v0.8h, #16 815; CHECK-NEXT: shll v0.4s, v0.4h, #16 816; CHECK-NEXT: fcvtzu v1.4s, v1.4s 817; CHECK-NEXT: fcvtzu v0.4s, v0.4s 818; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 819; CHECK-NEXT: ret 820 %1 = fptoui<8 x bfloat> %a to <8 x i16> 821 ret <8 x i16> %1 822} 823 824define <8 x i1> @test_fcmp_une(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 825; CHECK-LABEL: test_fcmp_une: 826; CHECK: // %bb.0: 827; CHECK-NEXT: dup v2.4h, v1.h[1] 828; CHECK-NEXT: dup v3.4h, v0.h[1] 829; CHECK-NEXT: dup v4.4h, v1.h[2] 830; CHECK-NEXT: dup v5.4h, v0.h[2] 831; CHECK-NEXT: dup v6.4h, v0.h[3] 832; CHECK-NEXT: shll v2.4s, v2.4h, #16 833; CHECK-NEXT: shll v3.4s, v3.4h, #16 834; CHECK-NEXT: fcmp s3, s2 835; CHECK-NEXT: shll v2.4s, v1.4h, #16 836; CHECK-NEXT: shll v3.4s, v0.4h, #16 837; CHECK-NEXT: csetm w8, ne 838; CHECK-NEXT: fcmp s3, s2 839; CHECK-NEXT: shll v3.4s, v4.4h, #16 840; CHECK-NEXT: shll v4.4s, v5.4h, #16 841; CHECK-NEXT: dup v5.4h, v1.h[3] 842; CHECK-NEXT: csetm w9, ne 843; CHECK-NEXT: fmov s2, w9 844; CHECK-NEXT: fcmp s4, s3 845; CHECK-NEXT: shll v4.4s, v6.4h, #16 846; CHECK-NEXT: shll v3.4s, v5.4h, #16 847; CHECK-NEXT: dup v5.8h, v1.h[4] 848; CHECK-NEXT: dup v6.8h, v0.h[4] 849; CHECK-NEXT: mov v2.h[1], w8 850; CHECK-NEXT: csetm w8, ne 851; CHECK-NEXT: fcmp s4, s3 852; CHECK-NEXT: shll v3.4s, v5.4h, #16 853; CHECK-NEXT: shll v4.4s, v6.4h, #16 854; CHECK-NEXT: dup v5.8h, v1.h[5] 855; CHECK-NEXT: dup v6.8h, v0.h[5] 856; CHECK-NEXT: mov v2.h[2], w8 857; CHECK-NEXT: csetm w8, ne 858; CHECK-NEXT: fcmp s4, s3 859; CHECK-NEXT: shll v3.4s, v5.4h, #16 860; CHECK-NEXT: shll v4.4s, v6.4h, #16 861; CHECK-NEXT: dup v5.8h, v1.h[6] 862; CHECK-NEXT: dup v6.8h, v0.h[6] 863; CHECK-NEXT: dup v1.8h, v1.h[7] 864; CHECK-NEXT: dup v0.8h, v0.h[7] 865; CHECK-NEXT: mov v2.h[3], w8 866; CHECK-NEXT: csetm w8, ne 867; CHECK-NEXT: fcmp s4, s3 868; CHECK-NEXT: shll v3.4s, v5.4h, #16 869; CHECK-NEXT: shll v4.4s, v6.4h, #16 870; CHECK-NEXT: shll v1.4s, v1.4h, #16 871; CHECK-NEXT: shll v0.4s, v0.4h, #16 872; CHECK-NEXT: mov v2.h[4], w8 873; CHECK-NEXT: csetm w8, ne 874; CHECK-NEXT: fcmp s4, s3 875; CHECK-NEXT: mov v2.h[5], w8 876; CHECK-NEXT: csetm w8, ne 877; CHECK-NEXT: fcmp s0, s1 878; CHECK-NEXT: mov v2.h[6], w8 879; CHECK-NEXT: csetm w8, ne 880; CHECK-NEXT: mov v2.h[7], w8 881; CHECK-NEXT: xtn v0.8b, v2.8h 882; CHECK-NEXT: ret 883 %1 = fcmp une <8 x bfloat> %a, %b 884 ret <8 x i1> %1 885} 886 887define <8 x i1> @test_fcmp_ueq(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 888; CHECK-LABEL: test_fcmp_ueq: 889; CHECK: // %bb.0: 890; CHECK-NEXT: dup v2.4h, v1.h[1] 891; CHECK-NEXT: dup v3.4h, v0.h[1] 892; CHECK-NEXT: dup v4.4h, v1.h[2] 893; CHECK-NEXT: dup v5.4h, v0.h[2] 894; CHECK-NEXT: dup v6.4h, v0.h[3] 895; CHECK-NEXT: shll v2.4s, v2.4h, #16 896; CHECK-NEXT: shll v3.4s, v3.4h, #16 897; CHECK-NEXT: fcmp s3, s2 898; CHECK-NEXT: shll v2.4s, v1.4h, #16 899; CHECK-NEXT: shll v3.4s, v0.4h, #16 900; CHECK-NEXT: csetm w8, eq 901; CHECK-NEXT: csinv w8, w8, wzr, vc 902; CHECK-NEXT: fcmp s3, s2 903; CHECK-NEXT: shll v3.4s, v4.4h, #16 904; CHECK-NEXT: shll v4.4s, v5.4h, #16 905; CHECK-NEXT: dup v5.4h, v1.h[3] 906; CHECK-NEXT: csetm w9, eq 907; CHECK-NEXT: csinv w9, w9, wzr, vc 908; CHECK-NEXT: fcmp s4, s3 909; CHECK-NEXT: shll v4.4s, v6.4h, #16 910; CHECK-NEXT: fmov s2, w9 911; CHECK-NEXT: shll v3.4s, v5.4h, #16 912; CHECK-NEXT: dup v5.8h, v1.h[4] 913; CHECK-NEXT: dup v6.8h, v0.h[4] 914; CHECK-NEXT: mov v2.h[1], w8 915; CHECK-NEXT: csetm w8, eq 916; CHECK-NEXT: csinv w8, w8, wzr, vc 917; CHECK-NEXT: fcmp s4, s3 918; CHECK-NEXT: shll v3.4s, v5.4h, #16 919; CHECK-NEXT: shll v4.4s, v6.4h, #16 920; CHECK-NEXT: dup v5.8h, v1.h[5] 921; CHECK-NEXT: dup v6.8h, v0.h[5] 922; CHECK-NEXT: mov v2.h[2], w8 923; CHECK-NEXT: csetm w8, eq 924; CHECK-NEXT: csinv w8, w8, wzr, vc 925; CHECK-NEXT: fcmp s4, s3 926; CHECK-NEXT: shll v3.4s, v5.4h, #16 927; CHECK-NEXT: shll v4.4s, v6.4h, #16 928; CHECK-NEXT: dup v5.8h, v1.h[6] 929; CHECK-NEXT: dup v6.8h, v0.h[6] 930; CHECK-NEXT: dup v1.8h, v1.h[7] 931; CHECK-NEXT: dup v0.8h, v0.h[7] 932; CHECK-NEXT: mov v2.h[3], w8 933; CHECK-NEXT: csetm w8, eq 934; CHECK-NEXT: csinv w8, w8, wzr, vc 935; CHECK-NEXT: fcmp s4, s3 936; CHECK-NEXT: shll v3.4s, v5.4h, #16 937; CHECK-NEXT: shll v4.4s, v6.4h, #16 938; CHECK-NEXT: shll v1.4s, v1.4h, #16 939; CHECK-NEXT: shll v0.4s, v0.4h, #16 940; CHECK-NEXT: mov v2.h[4], w8 941; CHECK-NEXT: csetm w8, eq 942; CHECK-NEXT: csinv w8, w8, wzr, vc 943; CHECK-NEXT: fcmp s4, s3 944; CHECK-NEXT: mov v2.h[5], w8 945; CHECK-NEXT: csetm w8, eq 946; CHECK-NEXT: csinv w8, w8, wzr, vc 947; CHECK-NEXT: fcmp s0, s1 948; CHECK-NEXT: mov v2.h[6], w8 949; CHECK-NEXT: csetm w8, eq 950; CHECK-NEXT: csinv w8, w8, wzr, vc 951; CHECK-NEXT: mov v2.h[7], w8 952; CHECK-NEXT: xtn v0.8b, v2.8h 953; CHECK-NEXT: ret 954 %1 = fcmp ueq <8 x bfloat> %a, %b 955 ret <8 x i1> %1 956} 957 958define <8 x i1> @test_fcmp_ugt(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 959; CHECK-LABEL: test_fcmp_ugt: 960; CHECK: // %bb.0: 961; CHECK-NEXT: dup v2.4h, v1.h[1] 962; CHECK-NEXT: dup v3.4h, v0.h[1] 963; CHECK-NEXT: dup v4.4h, v1.h[2] 964; CHECK-NEXT: dup v5.4h, v0.h[2] 965; CHECK-NEXT: dup v6.4h, v0.h[3] 966; CHECK-NEXT: shll v2.4s, v2.4h, #16 967; CHECK-NEXT: shll v3.4s, v3.4h, #16 968; CHECK-NEXT: fcmp s3, s2 969; CHECK-NEXT: shll v2.4s, v1.4h, #16 970; CHECK-NEXT: shll v3.4s, v0.4h, #16 971; CHECK-NEXT: csetm w8, hi 972; CHECK-NEXT: fcmp s3, s2 973; CHECK-NEXT: shll v3.4s, v4.4h, #16 974; CHECK-NEXT: shll v4.4s, v5.4h, #16 975; CHECK-NEXT: dup v5.4h, v1.h[3] 976; CHECK-NEXT: csetm w9, hi 977; CHECK-NEXT: fmov s2, w9 978; CHECK-NEXT: fcmp s4, s3 979; CHECK-NEXT: shll v4.4s, v6.4h, #16 980; CHECK-NEXT: shll v3.4s, v5.4h, #16 981; CHECK-NEXT: dup v5.8h, v1.h[4] 982; CHECK-NEXT: dup v6.8h, v0.h[4] 983; CHECK-NEXT: mov v2.h[1], w8 984; CHECK-NEXT: csetm w8, hi 985; CHECK-NEXT: fcmp s4, s3 986; CHECK-NEXT: shll v3.4s, v5.4h, #16 987; CHECK-NEXT: shll v4.4s, v6.4h, #16 988; CHECK-NEXT: dup v5.8h, v1.h[5] 989; CHECK-NEXT: dup v6.8h, v0.h[5] 990; CHECK-NEXT: mov v2.h[2], w8 991; CHECK-NEXT: csetm w8, hi 992; CHECK-NEXT: fcmp s4, s3 993; CHECK-NEXT: shll v3.4s, v5.4h, #16 994; CHECK-NEXT: shll v4.4s, v6.4h, #16 995; CHECK-NEXT: dup v5.8h, v1.h[6] 996; CHECK-NEXT: dup v6.8h, v0.h[6] 997; CHECK-NEXT: dup v1.8h, v1.h[7] 998; CHECK-NEXT: dup v0.8h, v0.h[7] 999; CHECK-NEXT: mov v2.h[3], w8 1000; CHECK-NEXT: csetm w8, hi 1001; CHECK-NEXT: fcmp s4, s3 1002; CHECK-NEXT: shll v3.4s, v5.4h, #16 1003; CHECK-NEXT: shll v4.4s, v6.4h, #16 1004; CHECK-NEXT: shll v1.4s, v1.4h, #16 1005; CHECK-NEXT: shll v0.4s, v0.4h, #16 1006; CHECK-NEXT: mov v2.h[4], w8 1007; CHECK-NEXT: csetm w8, hi 1008; CHECK-NEXT: fcmp s4, s3 1009; CHECK-NEXT: mov v2.h[5], w8 1010; CHECK-NEXT: csetm w8, hi 1011; CHECK-NEXT: fcmp s0, s1 1012; CHECK-NEXT: mov v2.h[6], w8 1013; CHECK-NEXT: csetm w8, hi 1014; CHECK-NEXT: mov v2.h[7], w8 1015; CHECK-NEXT: xtn v0.8b, v2.8h 1016; CHECK-NEXT: ret 1017 %1 = fcmp ugt <8 x bfloat> %a, %b 1018 ret <8 x i1> %1 1019} 1020 1021define <8 x i1> @test_fcmp_uge(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1022; CHECK-LABEL: test_fcmp_uge: 1023; CHECK: // %bb.0: 1024; CHECK-NEXT: dup v2.4h, v1.h[1] 1025; CHECK-NEXT: dup v3.4h, v0.h[1] 1026; CHECK-NEXT: dup v4.4h, v1.h[2] 1027; CHECK-NEXT: dup v5.4h, v0.h[2] 1028; CHECK-NEXT: dup v6.4h, v0.h[3] 1029; CHECK-NEXT: shll v2.4s, v2.4h, #16 1030; CHECK-NEXT: shll v3.4s, v3.4h, #16 1031; CHECK-NEXT: fcmp s3, s2 1032; CHECK-NEXT: shll v2.4s, v1.4h, #16 1033; CHECK-NEXT: shll v3.4s, v0.4h, #16 1034; CHECK-NEXT: csetm w8, pl 1035; CHECK-NEXT: fcmp s3, s2 1036; CHECK-NEXT: shll v3.4s, v4.4h, #16 1037; CHECK-NEXT: shll v4.4s, v5.4h, #16 1038; CHECK-NEXT: dup v5.4h, v1.h[3] 1039; CHECK-NEXT: csetm w9, pl 1040; CHECK-NEXT: fmov s2, w9 1041; CHECK-NEXT: fcmp s4, s3 1042; CHECK-NEXT: shll v4.4s, v6.4h, #16 1043; CHECK-NEXT: shll v3.4s, v5.4h, #16 1044; CHECK-NEXT: dup v5.8h, v1.h[4] 1045; CHECK-NEXT: dup v6.8h, v0.h[4] 1046; CHECK-NEXT: mov v2.h[1], w8 1047; CHECK-NEXT: csetm w8, pl 1048; CHECK-NEXT: fcmp s4, s3 1049; CHECK-NEXT: shll v3.4s, v5.4h, #16 1050; CHECK-NEXT: shll v4.4s, v6.4h, #16 1051; CHECK-NEXT: dup v5.8h, v1.h[5] 1052; CHECK-NEXT: dup v6.8h, v0.h[5] 1053; CHECK-NEXT: mov v2.h[2], w8 1054; CHECK-NEXT: csetm w8, pl 1055; CHECK-NEXT: fcmp s4, s3 1056; CHECK-NEXT: shll v3.4s, v5.4h, #16 1057; CHECK-NEXT: shll v4.4s, v6.4h, #16 1058; CHECK-NEXT: dup v5.8h, v1.h[6] 1059; CHECK-NEXT: dup v6.8h, v0.h[6] 1060; CHECK-NEXT: dup v1.8h, v1.h[7] 1061; CHECK-NEXT: dup v0.8h, v0.h[7] 1062; CHECK-NEXT: mov v2.h[3], w8 1063; CHECK-NEXT: csetm w8, pl 1064; CHECK-NEXT: fcmp s4, s3 1065; CHECK-NEXT: shll v3.4s, v5.4h, #16 1066; CHECK-NEXT: shll v4.4s, v6.4h, #16 1067; CHECK-NEXT: shll v1.4s, v1.4h, #16 1068; CHECK-NEXT: shll v0.4s, v0.4h, #16 1069; CHECK-NEXT: mov v2.h[4], w8 1070; CHECK-NEXT: csetm w8, pl 1071; CHECK-NEXT: fcmp s4, s3 1072; CHECK-NEXT: mov v2.h[5], w8 1073; CHECK-NEXT: csetm w8, pl 1074; CHECK-NEXT: fcmp s0, s1 1075; CHECK-NEXT: mov v2.h[6], w8 1076; CHECK-NEXT: csetm w8, pl 1077; CHECK-NEXT: mov v2.h[7], w8 1078; CHECK-NEXT: xtn v0.8b, v2.8h 1079; CHECK-NEXT: ret 1080 %1 = fcmp uge <8 x bfloat> %a, %b 1081 ret <8 x i1> %1 1082} 1083 1084define <8 x i1> @test_fcmp_ult(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1085; CHECK-LABEL: test_fcmp_ult: 1086; CHECK: // %bb.0: 1087; CHECK-NEXT: dup v2.4h, v1.h[1] 1088; CHECK-NEXT: dup v3.4h, v0.h[1] 1089; CHECK-NEXT: dup v4.4h, v1.h[2] 1090; CHECK-NEXT: dup v5.4h, v0.h[2] 1091; CHECK-NEXT: dup v6.4h, v0.h[3] 1092; CHECK-NEXT: shll v2.4s, v2.4h, #16 1093; CHECK-NEXT: shll v3.4s, v3.4h, #16 1094; CHECK-NEXT: fcmp s3, s2 1095; CHECK-NEXT: shll v2.4s, v1.4h, #16 1096; CHECK-NEXT: shll v3.4s, v0.4h, #16 1097; CHECK-NEXT: csetm w8, lt 1098; CHECK-NEXT: fcmp s3, s2 1099; CHECK-NEXT: shll v3.4s, v4.4h, #16 1100; CHECK-NEXT: shll v4.4s, v5.4h, #16 1101; CHECK-NEXT: dup v5.4h, v1.h[3] 1102; CHECK-NEXT: csetm w9, lt 1103; CHECK-NEXT: fmov s2, w9 1104; CHECK-NEXT: fcmp s4, s3 1105; CHECK-NEXT: shll v4.4s, v6.4h, #16 1106; CHECK-NEXT: shll v3.4s, v5.4h, #16 1107; CHECK-NEXT: dup v5.8h, v1.h[4] 1108; CHECK-NEXT: dup v6.8h, v0.h[4] 1109; CHECK-NEXT: mov v2.h[1], w8 1110; CHECK-NEXT: csetm w8, lt 1111; CHECK-NEXT: fcmp s4, s3 1112; CHECK-NEXT: shll v3.4s, v5.4h, #16 1113; CHECK-NEXT: shll v4.4s, v6.4h, #16 1114; CHECK-NEXT: dup v5.8h, v1.h[5] 1115; CHECK-NEXT: dup v6.8h, v0.h[5] 1116; CHECK-NEXT: mov v2.h[2], w8 1117; CHECK-NEXT: csetm w8, lt 1118; CHECK-NEXT: fcmp s4, s3 1119; CHECK-NEXT: shll v3.4s, v5.4h, #16 1120; CHECK-NEXT: shll v4.4s, v6.4h, #16 1121; CHECK-NEXT: dup v5.8h, v1.h[6] 1122; CHECK-NEXT: dup v6.8h, v0.h[6] 1123; CHECK-NEXT: dup v1.8h, v1.h[7] 1124; CHECK-NEXT: dup v0.8h, v0.h[7] 1125; CHECK-NEXT: mov v2.h[3], w8 1126; CHECK-NEXT: csetm w8, lt 1127; CHECK-NEXT: fcmp s4, s3 1128; CHECK-NEXT: shll v3.4s, v5.4h, #16 1129; CHECK-NEXT: shll v4.4s, v6.4h, #16 1130; CHECK-NEXT: shll v1.4s, v1.4h, #16 1131; CHECK-NEXT: shll v0.4s, v0.4h, #16 1132; CHECK-NEXT: mov v2.h[4], w8 1133; CHECK-NEXT: csetm w8, lt 1134; CHECK-NEXT: fcmp s4, s3 1135; CHECK-NEXT: mov v2.h[5], w8 1136; CHECK-NEXT: csetm w8, lt 1137; CHECK-NEXT: fcmp s0, s1 1138; CHECK-NEXT: mov v2.h[6], w8 1139; CHECK-NEXT: csetm w8, lt 1140; CHECK-NEXT: mov v2.h[7], w8 1141; CHECK-NEXT: xtn v0.8b, v2.8h 1142; CHECK-NEXT: ret 1143 %1 = fcmp ult <8 x bfloat> %a, %b 1144 ret <8 x i1> %1 1145} 1146 1147define <8 x i1> @test_fcmp_ule(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1148; CHECK-LABEL: test_fcmp_ule: 1149; CHECK: // %bb.0: 1150; CHECK-NEXT: dup v2.4h, v1.h[1] 1151; CHECK-NEXT: dup v3.4h, v0.h[1] 1152; CHECK-NEXT: dup v4.4h, v1.h[2] 1153; CHECK-NEXT: dup v5.4h, v0.h[2] 1154; CHECK-NEXT: dup v6.4h, v0.h[3] 1155; CHECK-NEXT: shll v2.4s, v2.4h, #16 1156; CHECK-NEXT: shll v3.4s, v3.4h, #16 1157; CHECK-NEXT: fcmp s3, s2 1158; CHECK-NEXT: shll v2.4s, v1.4h, #16 1159; CHECK-NEXT: shll v3.4s, v0.4h, #16 1160; CHECK-NEXT: csetm w8, le 1161; CHECK-NEXT: fcmp s3, s2 1162; CHECK-NEXT: shll v3.4s, v4.4h, #16 1163; CHECK-NEXT: shll v4.4s, v5.4h, #16 1164; CHECK-NEXT: dup v5.4h, v1.h[3] 1165; CHECK-NEXT: csetm w9, le 1166; CHECK-NEXT: fmov s2, w9 1167; CHECK-NEXT: fcmp s4, s3 1168; CHECK-NEXT: shll v4.4s, v6.4h, #16 1169; CHECK-NEXT: shll v3.4s, v5.4h, #16 1170; CHECK-NEXT: dup v5.8h, v1.h[4] 1171; CHECK-NEXT: dup v6.8h, v0.h[4] 1172; CHECK-NEXT: mov v2.h[1], w8 1173; CHECK-NEXT: csetm w8, le 1174; CHECK-NEXT: fcmp s4, s3 1175; CHECK-NEXT: shll v3.4s, v5.4h, #16 1176; CHECK-NEXT: shll v4.4s, v6.4h, #16 1177; CHECK-NEXT: dup v5.8h, v1.h[5] 1178; CHECK-NEXT: dup v6.8h, v0.h[5] 1179; CHECK-NEXT: mov v2.h[2], w8 1180; CHECK-NEXT: csetm w8, le 1181; CHECK-NEXT: fcmp s4, s3 1182; CHECK-NEXT: shll v3.4s, v5.4h, #16 1183; CHECK-NEXT: shll v4.4s, v6.4h, #16 1184; CHECK-NEXT: dup v5.8h, v1.h[6] 1185; CHECK-NEXT: dup v6.8h, v0.h[6] 1186; CHECK-NEXT: dup v1.8h, v1.h[7] 1187; CHECK-NEXT: dup v0.8h, v0.h[7] 1188; CHECK-NEXT: mov v2.h[3], w8 1189; CHECK-NEXT: csetm w8, le 1190; CHECK-NEXT: fcmp s4, s3 1191; CHECK-NEXT: shll v3.4s, v5.4h, #16 1192; CHECK-NEXT: shll v4.4s, v6.4h, #16 1193; CHECK-NEXT: shll v1.4s, v1.4h, #16 1194; CHECK-NEXT: shll v0.4s, v0.4h, #16 1195; CHECK-NEXT: mov v2.h[4], w8 1196; CHECK-NEXT: csetm w8, le 1197; CHECK-NEXT: fcmp s4, s3 1198; CHECK-NEXT: mov v2.h[5], w8 1199; CHECK-NEXT: csetm w8, le 1200; CHECK-NEXT: fcmp s0, s1 1201; CHECK-NEXT: mov v2.h[6], w8 1202; CHECK-NEXT: csetm w8, le 1203; CHECK-NEXT: mov v2.h[7], w8 1204; CHECK-NEXT: xtn v0.8b, v2.8h 1205; CHECK-NEXT: ret 1206 %1 = fcmp ule <8 x bfloat> %a, %b 1207 ret <8 x i1> %1 1208} 1209 1210define <8 x i1> @test_fcmp_uno(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1211; CHECK-LABEL: test_fcmp_uno: 1212; CHECK: // %bb.0: 1213; CHECK-NEXT: dup v2.4h, v1.h[1] 1214; CHECK-NEXT: dup v3.4h, v0.h[1] 1215; CHECK-NEXT: dup v4.4h, v1.h[2] 1216; CHECK-NEXT: dup v5.4h, v0.h[2] 1217; CHECK-NEXT: dup v6.4h, v0.h[3] 1218; CHECK-NEXT: shll v2.4s, v2.4h, #16 1219; CHECK-NEXT: shll v3.4s, v3.4h, #16 1220; CHECK-NEXT: fcmp s3, s2 1221; CHECK-NEXT: shll v2.4s, v1.4h, #16 1222; CHECK-NEXT: shll v3.4s, v0.4h, #16 1223; CHECK-NEXT: csetm w8, vs 1224; CHECK-NEXT: fcmp s3, s2 1225; CHECK-NEXT: shll v3.4s, v4.4h, #16 1226; CHECK-NEXT: shll v4.4s, v5.4h, #16 1227; CHECK-NEXT: dup v5.4h, v1.h[3] 1228; CHECK-NEXT: csetm w9, vs 1229; CHECK-NEXT: fmov s2, w9 1230; CHECK-NEXT: fcmp s4, s3 1231; CHECK-NEXT: shll v4.4s, v6.4h, #16 1232; CHECK-NEXT: shll v3.4s, v5.4h, #16 1233; CHECK-NEXT: dup v5.8h, v1.h[4] 1234; CHECK-NEXT: dup v6.8h, v0.h[4] 1235; CHECK-NEXT: mov v2.h[1], w8 1236; CHECK-NEXT: csetm w8, vs 1237; CHECK-NEXT: fcmp s4, s3 1238; CHECK-NEXT: shll v3.4s, v5.4h, #16 1239; CHECK-NEXT: shll v4.4s, v6.4h, #16 1240; CHECK-NEXT: dup v5.8h, v1.h[5] 1241; CHECK-NEXT: dup v6.8h, v0.h[5] 1242; CHECK-NEXT: mov v2.h[2], w8 1243; CHECK-NEXT: csetm w8, vs 1244; CHECK-NEXT: fcmp s4, s3 1245; CHECK-NEXT: shll v3.4s, v5.4h, #16 1246; CHECK-NEXT: shll v4.4s, v6.4h, #16 1247; CHECK-NEXT: dup v5.8h, v1.h[6] 1248; CHECK-NEXT: dup v6.8h, v0.h[6] 1249; CHECK-NEXT: dup v1.8h, v1.h[7] 1250; CHECK-NEXT: dup v0.8h, v0.h[7] 1251; CHECK-NEXT: mov v2.h[3], w8 1252; CHECK-NEXT: csetm w8, vs 1253; CHECK-NEXT: fcmp s4, s3 1254; CHECK-NEXT: shll v3.4s, v5.4h, #16 1255; CHECK-NEXT: shll v4.4s, v6.4h, #16 1256; CHECK-NEXT: shll v1.4s, v1.4h, #16 1257; CHECK-NEXT: shll v0.4s, v0.4h, #16 1258; CHECK-NEXT: mov v2.h[4], w8 1259; CHECK-NEXT: csetm w8, vs 1260; CHECK-NEXT: fcmp s4, s3 1261; CHECK-NEXT: mov v2.h[5], w8 1262; CHECK-NEXT: csetm w8, vs 1263; CHECK-NEXT: fcmp s0, s1 1264; CHECK-NEXT: mov v2.h[6], w8 1265; CHECK-NEXT: csetm w8, vs 1266; CHECK-NEXT: mov v2.h[7], w8 1267; CHECK-NEXT: xtn v0.8b, v2.8h 1268; CHECK-NEXT: ret 1269 %1 = fcmp uno <8 x bfloat> %a, %b 1270 ret <8 x i1> %1 1271} 1272 1273define <8 x i1> @test_fcmp_one(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1274; CHECK-LABEL: test_fcmp_one: 1275; CHECK: // %bb.0: 1276; CHECK-NEXT: dup v2.4h, v1.h[1] 1277; CHECK-NEXT: dup v3.4h, v0.h[1] 1278; CHECK-NEXT: dup v4.4h, v1.h[2] 1279; CHECK-NEXT: dup v5.4h, v0.h[2] 1280; CHECK-NEXT: dup v6.4h, v0.h[3] 1281; CHECK-NEXT: shll v2.4s, v2.4h, #16 1282; CHECK-NEXT: shll v3.4s, v3.4h, #16 1283; CHECK-NEXT: fcmp s3, s2 1284; CHECK-NEXT: shll v2.4s, v1.4h, #16 1285; CHECK-NEXT: shll v3.4s, v0.4h, #16 1286; CHECK-NEXT: csetm w8, mi 1287; CHECK-NEXT: csinv w8, w8, wzr, le 1288; CHECK-NEXT: fcmp s3, s2 1289; CHECK-NEXT: shll v3.4s, v4.4h, #16 1290; CHECK-NEXT: shll v4.4s, v5.4h, #16 1291; CHECK-NEXT: dup v5.4h, v1.h[3] 1292; CHECK-NEXT: csetm w9, mi 1293; CHECK-NEXT: csinv w9, w9, wzr, le 1294; CHECK-NEXT: fcmp s4, s3 1295; CHECK-NEXT: shll v4.4s, v6.4h, #16 1296; CHECK-NEXT: fmov s2, w9 1297; CHECK-NEXT: shll v3.4s, v5.4h, #16 1298; CHECK-NEXT: dup v5.8h, v1.h[4] 1299; CHECK-NEXT: dup v6.8h, v0.h[4] 1300; CHECK-NEXT: mov v2.h[1], w8 1301; CHECK-NEXT: csetm w8, mi 1302; CHECK-NEXT: csinv w8, w8, wzr, le 1303; CHECK-NEXT: fcmp s4, s3 1304; CHECK-NEXT: shll v3.4s, v5.4h, #16 1305; CHECK-NEXT: shll v4.4s, v6.4h, #16 1306; CHECK-NEXT: dup v5.8h, v1.h[5] 1307; CHECK-NEXT: dup v6.8h, v0.h[5] 1308; CHECK-NEXT: mov v2.h[2], w8 1309; CHECK-NEXT: csetm w8, mi 1310; CHECK-NEXT: csinv w8, w8, wzr, le 1311; CHECK-NEXT: fcmp s4, s3 1312; CHECK-NEXT: shll v3.4s, v5.4h, #16 1313; CHECK-NEXT: shll v4.4s, v6.4h, #16 1314; CHECK-NEXT: dup v5.8h, v1.h[6] 1315; CHECK-NEXT: dup v6.8h, v0.h[6] 1316; CHECK-NEXT: dup v1.8h, v1.h[7] 1317; CHECK-NEXT: dup v0.8h, v0.h[7] 1318; CHECK-NEXT: mov v2.h[3], w8 1319; CHECK-NEXT: csetm w8, mi 1320; CHECK-NEXT: csinv w8, w8, wzr, le 1321; CHECK-NEXT: fcmp s4, s3 1322; CHECK-NEXT: shll v3.4s, v5.4h, #16 1323; CHECK-NEXT: shll v4.4s, v6.4h, #16 1324; CHECK-NEXT: shll v1.4s, v1.4h, #16 1325; CHECK-NEXT: shll v0.4s, v0.4h, #16 1326; CHECK-NEXT: mov v2.h[4], w8 1327; CHECK-NEXT: csetm w8, mi 1328; CHECK-NEXT: csinv w8, w8, wzr, le 1329; CHECK-NEXT: fcmp s4, s3 1330; CHECK-NEXT: mov v2.h[5], w8 1331; CHECK-NEXT: csetm w8, mi 1332; CHECK-NEXT: csinv w8, w8, wzr, le 1333; CHECK-NEXT: fcmp s0, s1 1334; CHECK-NEXT: mov v2.h[6], w8 1335; CHECK-NEXT: csetm w8, mi 1336; CHECK-NEXT: csinv w8, w8, wzr, le 1337; CHECK-NEXT: mov v2.h[7], w8 1338; CHECK-NEXT: xtn v0.8b, v2.8h 1339; CHECK-NEXT: ret 1340 %1 = fcmp one <8 x bfloat> %a, %b 1341 ret <8 x i1> %1 1342} 1343 1344define <8 x i1> @test_fcmp_oeq(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1345; CHECK-LABEL: test_fcmp_oeq: 1346; CHECK: // %bb.0: 1347; CHECK-NEXT: dup v2.4h, v1.h[1] 1348; CHECK-NEXT: dup v3.4h, v0.h[1] 1349; CHECK-NEXT: dup v4.4h, v1.h[2] 1350; CHECK-NEXT: dup v5.4h, v0.h[2] 1351; CHECK-NEXT: dup v6.4h, v0.h[3] 1352; CHECK-NEXT: shll v2.4s, v2.4h, #16 1353; CHECK-NEXT: shll v3.4s, v3.4h, #16 1354; CHECK-NEXT: fcmp s3, s2 1355; CHECK-NEXT: shll v2.4s, v1.4h, #16 1356; CHECK-NEXT: shll v3.4s, v0.4h, #16 1357; CHECK-NEXT: csetm w8, eq 1358; CHECK-NEXT: fcmp s3, s2 1359; CHECK-NEXT: shll v3.4s, v4.4h, #16 1360; CHECK-NEXT: shll v4.4s, v5.4h, #16 1361; CHECK-NEXT: dup v5.4h, v1.h[3] 1362; CHECK-NEXT: csetm w9, eq 1363; CHECK-NEXT: fmov s2, w9 1364; CHECK-NEXT: fcmp s4, s3 1365; CHECK-NEXT: shll v4.4s, v6.4h, #16 1366; CHECK-NEXT: shll v3.4s, v5.4h, #16 1367; CHECK-NEXT: dup v5.8h, v1.h[4] 1368; CHECK-NEXT: dup v6.8h, v0.h[4] 1369; CHECK-NEXT: mov v2.h[1], w8 1370; CHECK-NEXT: csetm w8, eq 1371; CHECK-NEXT: fcmp s4, s3 1372; CHECK-NEXT: shll v3.4s, v5.4h, #16 1373; CHECK-NEXT: shll v4.4s, v6.4h, #16 1374; CHECK-NEXT: dup v5.8h, v1.h[5] 1375; CHECK-NEXT: dup v6.8h, v0.h[5] 1376; CHECK-NEXT: mov v2.h[2], w8 1377; CHECK-NEXT: csetm w8, eq 1378; CHECK-NEXT: fcmp s4, s3 1379; CHECK-NEXT: shll v3.4s, v5.4h, #16 1380; CHECK-NEXT: shll v4.4s, v6.4h, #16 1381; CHECK-NEXT: dup v5.8h, v1.h[6] 1382; CHECK-NEXT: dup v6.8h, v0.h[6] 1383; CHECK-NEXT: dup v1.8h, v1.h[7] 1384; CHECK-NEXT: dup v0.8h, v0.h[7] 1385; CHECK-NEXT: mov v2.h[3], w8 1386; CHECK-NEXT: csetm w8, eq 1387; CHECK-NEXT: fcmp s4, s3 1388; CHECK-NEXT: shll v3.4s, v5.4h, #16 1389; CHECK-NEXT: shll v4.4s, v6.4h, #16 1390; CHECK-NEXT: shll v1.4s, v1.4h, #16 1391; CHECK-NEXT: shll v0.4s, v0.4h, #16 1392; CHECK-NEXT: mov v2.h[4], w8 1393; CHECK-NEXT: csetm w8, eq 1394; CHECK-NEXT: fcmp s4, s3 1395; CHECK-NEXT: mov v2.h[5], w8 1396; CHECK-NEXT: csetm w8, eq 1397; CHECK-NEXT: fcmp s0, s1 1398; CHECK-NEXT: mov v2.h[6], w8 1399; CHECK-NEXT: csetm w8, eq 1400; CHECK-NEXT: mov v2.h[7], w8 1401; CHECK-NEXT: xtn v0.8b, v2.8h 1402; CHECK-NEXT: ret 1403 %1 = fcmp oeq <8 x bfloat> %a, %b 1404 ret <8 x i1> %1 1405} 1406 1407define <8 x i1> @test_fcmp_ogt(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1408; CHECK-LABEL: test_fcmp_ogt: 1409; CHECK: // %bb.0: 1410; CHECK-NEXT: dup v2.4h, v1.h[1] 1411; CHECK-NEXT: dup v3.4h, v0.h[1] 1412; CHECK-NEXT: dup v4.4h, v1.h[2] 1413; CHECK-NEXT: dup v5.4h, v0.h[2] 1414; CHECK-NEXT: dup v6.4h, v0.h[3] 1415; CHECK-NEXT: shll v2.4s, v2.4h, #16 1416; CHECK-NEXT: shll v3.4s, v3.4h, #16 1417; CHECK-NEXT: fcmp s3, s2 1418; CHECK-NEXT: shll v2.4s, v1.4h, #16 1419; CHECK-NEXT: shll v3.4s, v0.4h, #16 1420; CHECK-NEXT: csetm w8, gt 1421; CHECK-NEXT: fcmp s3, s2 1422; CHECK-NEXT: shll v3.4s, v4.4h, #16 1423; CHECK-NEXT: shll v4.4s, v5.4h, #16 1424; CHECK-NEXT: dup v5.4h, v1.h[3] 1425; CHECK-NEXT: csetm w9, gt 1426; CHECK-NEXT: fmov s2, w9 1427; CHECK-NEXT: fcmp s4, s3 1428; CHECK-NEXT: shll v4.4s, v6.4h, #16 1429; CHECK-NEXT: shll v3.4s, v5.4h, #16 1430; CHECK-NEXT: dup v5.8h, v1.h[4] 1431; CHECK-NEXT: dup v6.8h, v0.h[4] 1432; CHECK-NEXT: mov v2.h[1], w8 1433; CHECK-NEXT: csetm w8, gt 1434; CHECK-NEXT: fcmp s4, s3 1435; CHECK-NEXT: shll v3.4s, v5.4h, #16 1436; CHECK-NEXT: shll v4.4s, v6.4h, #16 1437; CHECK-NEXT: dup v5.8h, v1.h[5] 1438; CHECK-NEXT: dup v6.8h, v0.h[5] 1439; CHECK-NEXT: mov v2.h[2], w8 1440; CHECK-NEXT: csetm w8, gt 1441; CHECK-NEXT: fcmp s4, s3 1442; CHECK-NEXT: shll v3.4s, v5.4h, #16 1443; CHECK-NEXT: shll v4.4s, v6.4h, #16 1444; CHECK-NEXT: dup v5.8h, v1.h[6] 1445; CHECK-NEXT: dup v6.8h, v0.h[6] 1446; CHECK-NEXT: dup v1.8h, v1.h[7] 1447; CHECK-NEXT: dup v0.8h, v0.h[7] 1448; CHECK-NEXT: mov v2.h[3], w8 1449; CHECK-NEXT: csetm w8, gt 1450; CHECK-NEXT: fcmp s4, s3 1451; CHECK-NEXT: shll v3.4s, v5.4h, #16 1452; CHECK-NEXT: shll v4.4s, v6.4h, #16 1453; CHECK-NEXT: shll v1.4s, v1.4h, #16 1454; CHECK-NEXT: shll v0.4s, v0.4h, #16 1455; CHECK-NEXT: mov v2.h[4], w8 1456; CHECK-NEXT: csetm w8, gt 1457; CHECK-NEXT: fcmp s4, s3 1458; CHECK-NEXT: mov v2.h[5], w8 1459; CHECK-NEXT: csetm w8, gt 1460; CHECK-NEXT: fcmp s0, s1 1461; CHECK-NEXT: mov v2.h[6], w8 1462; CHECK-NEXT: csetm w8, gt 1463; CHECK-NEXT: mov v2.h[7], w8 1464; CHECK-NEXT: xtn v0.8b, v2.8h 1465; CHECK-NEXT: ret 1466 %1 = fcmp ogt <8 x bfloat> %a, %b 1467 ret <8 x i1> %1 1468} 1469 1470define <8 x i1> @test_fcmp_oge(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1471; CHECK-LABEL: test_fcmp_oge: 1472; CHECK: // %bb.0: 1473; CHECK-NEXT: dup v2.4h, v1.h[1] 1474; CHECK-NEXT: dup v3.4h, v0.h[1] 1475; CHECK-NEXT: dup v4.4h, v1.h[2] 1476; CHECK-NEXT: dup v5.4h, v0.h[2] 1477; CHECK-NEXT: dup v6.4h, v0.h[3] 1478; CHECK-NEXT: shll v2.4s, v2.4h, #16 1479; CHECK-NEXT: shll v3.4s, v3.4h, #16 1480; CHECK-NEXT: fcmp s3, s2 1481; CHECK-NEXT: shll v2.4s, v1.4h, #16 1482; CHECK-NEXT: shll v3.4s, v0.4h, #16 1483; CHECK-NEXT: csetm w8, ge 1484; CHECK-NEXT: fcmp s3, s2 1485; CHECK-NEXT: shll v3.4s, v4.4h, #16 1486; CHECK-NEXT: shll v4.4s, v5.4h, #16 1487; CHECK-NEXT: dup v5.4h, v1.h[3] 1488; CHECK-NEXT: csetm w9, ge 1489; CHECK-NEXT: fmov s2, w9 1490; CHECK-NEXT: fcmp s4, s3 1491; CHECK-NEXT: shll v4.4s, v6.4h, #16 1492; CHECK-NEXT: shll v3.4s, v5.4h, #16 1493; CHECK-NEXT: dup v5.8h, v1.h[4] 1494; CHECK-NEXT: dup v6.8h, v0.h[4] 1495; CHECK-NEXT: mov v2.h[1], w8 1496; CHECK-NEXT: csetm w8, ge 1497; CHECK-NEXT: fcmp s4, s3 1498; CHECK-NEXT: shll v3.4s, v5.4h, #16 1499; CHECK-NEXT: shll v4.4s, v6.4h, #16 1500; CHECK-NEXT: dup v5.8h, v1.h[5] 1501; CHECK-NEXT: dup v6.8h, v0.h[5] 1502; CHECK-NEXT: mov v2.h[2], w8 1503; CHECK-NEXT: csetm w8, ge 1504; CHECK-NEXT: fcmp s4, s3 1505; CHECK-NEXT: shll v3.4s, v5.4h, #16 1506; CHECK-NEXT: shll v4.4s, v6.4h, #16 1507; CHECK-NEXT: dup v5.8h, v1.h[6] 1508; CHECK-NEXT: dup v6.8h, v0.h[6] 1509; CHECK-NEXT: dup v1.8h, v1.h[7] 1510; CHECK-NEXT: dup v0.8h, v0.h[7] 1511; CHECK-NEXT: mov v2.h[3], w8 1512; CHECK-NEXT: csetm w8, ge 1513; CHECK-NEXT: fcmp s4, s3 1514; CHECK-NEXT: shll v3.4s, v5.4h, #16 1515; CHECK-NEXT: shll v4.4s, v6.4h, #16 1516; CHECK-NEXT: shll v1.4s, v1.4h, #16 1517; CHECK-NEXT: shll v0.4s, v0.4h, #16 1518; CHECK-NEXT: mov v2.h[4], w8 1519; CHECK-NEXT: csetm w8, ge 1520; CHECK-NEXT: fcmp s4, s3 1521; CHECK-NEXT: mov v2.h[5], w8 1522; CHECK-NEXT: csetm w8, ge 1523; CHECK-NEXT: fcmp s0, s1 1524; CHECK-NEXT: mov v2.h[6], w8 1525; CHECK-NEXT: csetm w8, ge 1526; CHECK-NEXT: mov v2.h[7], w8 1527; CHECK-NEXT: xtn v0.8b, v2.8h 1528; CHECK-NEXT: ret 1529 %1 = fcmp oge <8 x bfloat> %a, %b 1530 ret <8 x i1> %1 1531} 1532 1533define <8 x i1> @test_fcmp_olt(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1534; CHECK-LABEL: test_fcmp_olt: 1535; CHECK: // %bb.0: 1536; CHECK-NEXT: dup v2.4h, v1.h[1] 1537; CHECK-NEXT: dup v3.4h, v0.h[1] 1538; CHECK-NEXT: dup v4.4h, v1.h[2] 1539; CHECK-NEXT: dup v5.4h, v0.h[2] 1540; CHECK-NEXT: dup v6.4h, v0.h[3] 1541; CHECK-NEXT: shll v2.4s, v2.4h, #16 1542; CHECK-NEXT: shll v3.4s, v3.4h, #16 1543; CHECK-NEXT: fcmp s3, s2 1544; CHECK-NEXT: shll v2.4s, v1.4h, #16 1545; CHECK-NEXT: shll v3.4s, v0.4h, #16 1546; CHECK-NEXT: csetm w8, mi 1547; CHECK-NEXT: fcmp s3, s2 1548; CHECK-NEXT: shll v3.4s, v4.4h, #16 1549; CHECK-NEXT: shll v4.4s, v5.4h, #16 1550; CHECK-NEXT: dup v5.4h, v1.h[3] 1551; CHECK-NEXT: csetm w9, mi 1552; CHECK-NEXT: fmov s2, w9 1553; CHECK-NEXT: fcmp s4, s3 1554; CHECK-NEXT: shll v4.4s, v6.4h, #16 1555; CHECK-NEXT: shll v3.4s, v5.4h, #16 1556; CHECK-NEXT: dup v5.8h, v1.h[4] 1557; CHECK-NEXT: dup v6.8h, v0.h[4] 1558; CHECK-NEXT: mov v2.h[1], w8 1559; CHECK-NEXT: csetm w8, mi 1560; CHECK-NEXT: fcmp s4, s3 1561; CHECK-NEXT: shll v3.4s, v5.4h, #16 1562; CHECK-NEXT: shll v4.4s, v6.4h, #16 1563; CHECK-NEXT: dup v5.8h, v1.h[5] 1564; CHECK-NEXT: dup v6.8h, v0.h[5] 1565; CHECK-NEXT: mov v2.h[2], w8 1566; CHECK-NEXT: csetm w8, mi 1567; CHECK-NEXT: fcmp s4, s3 1568; CHECK-NEXT: shll v3.4s, v5.4h, #16 1569; CHECK-NEXT: shll v4.4s, v6.4h, #16 1570; CHECK-NEXT: dup v5.8h, v1.h[6] 1571; CHECK-NEXT: dup v6.8h, v0.h[6] 1572; CHECK-NEXT: dup v1.8h, v1.h[7] 1573; CHECK-NEXT: dup v0.8h, v0.h[7] 1574; CHECK-NEXT: mov v2.h[3], w8 1575; CHECK-NEXT: csetm w8, mi 1576; CHECK-NEXT: fcmp s4, s3 1577; CHECK-NEXT: shll v3.4s, v5.4h, #16 1578; CHECK-NEXT: shll v4.4s, v6.4h, #16 1579; CHECK-NEXT: shll v1.4s, v1.4h, #16 1580; CHECK-NEXT: shll v0.4s, v0.4h, #16 1581; CHECK-NEXT: mov v2.h[4], w8 1582; CHECK-NEXT: csetm w8, mi 1583; CHECK-NEXT: fcmp s4, s3 1584; CHECK-NEXT: mov v2.h[5], w8 1585; CHECK-NEXT: csetm w8, mi 1586; CHECK-NEXT: fcmp s0, s1 1587; CHECK-NEXT: mov v2.h[6], w8 1588; CHECK-NEXT: csetm w8, mi 1589; CHECK-NEXT: mov v2.h[7], w8 1590; CHECK-NEXT: xtn v0.8b, v2.8h 1591; CHECK-NEXT: ret 1592 %1 = fcmp olt <8 x bfloat> %a, %b 1593 ret <8 x i1> %1 1594} 1595 1596define <8 x i1> @test_fcmp_ole(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1597; CHECK-LABEL: test_fcmp_ole: 1598; CHECK: // %bb.0: 1599; CHECK-NEXT: dup v2.4h, v1.h[1] 1600; CHECK-NEXT: dup v3.4h, v0.h[1] 1601; CHECK-NEXT: dup v4.4h, v1.h[2] 1602; CHECK-NEXT: dup v5.4h, v0.h[2] 1603; CHECK-NEXT: dup v6.4h, v0.h[3] 1604; CHECK-NEXT: shll v2.4s, v2.4h, #16 1605; CHECK-NEXT: shll v3.4s, v3.4h, #16 1606; CHECK-NEXT: fcmp s3, s2 1607; CHECK-NEXT: shll v2.4s, v1.4h, #16 1608; CHECK-NEXT: shll v3.4s, v0.4h, #16 1609; CHECK-NEXT: csetm w8, ls 1610; CHECK-NEXT: fcmp s3, s2 1611; CHECK-NEXT: shll v3.4s, v4.4h, #16 1612; CHECK-NEXT: shll v4.4s, v5.4h, #16 1613; CHECK-NEXT: dup v5.4h, v1.h[3] 1614; CHECK-NEXT: csetm w9, ls 1615; CHECK-NEXT: fmov s2, w9 1616; CHECK-NEXT: fcmp s4, s3 1617; CHECK-NEXT: shll v4.4s, v6.4h, #16 1618; CHECK-NEXT: shll v3.4s, v5.4h, #16 1619; CHECK-NEXT: dup v5.8h, v1.h[4] 1620; CHECK-NEXT: dup v6.8h, v0.h[4] 1621; CHECK-NEXT: mov v2.h[1], w8 1622; CHECK-NEXT: csetm w8, ls 1623; CHECK-NEXT: fcmp s4, s3 1624; CHECK-NEXT: shll v3.4s, v5.4h, #16 1625; CHECK-NEXT: shll v4.4s, v6.4h, #16 1626; CHECK-NEXT: dup v5.8h, v1.h[5] 1627; CHECK-NEXT: dup v6.8h, v0.h[5] 1628; CHECK-NEXT: mov v2.h[2], w8 1629; CHECK-NEXT: csetm w8, ls 1630; CHECK-NEXT: fcmp s4, s3 1631; CHECK-NEXT: shll v3.4s, v5.4h, #16 1632; CHECK-NEXT: shll v4.4s, v6.4h, #16 1633; CHECK-NEXT: dup v5.8h, v1.h[6] 1634; CHECK-NEXT: dup v6.8h, v0.h[6] 1635; CHECK-NEXT: dup v1.8h, v1.h[7] 1636; CHECK-NEXT: dup v0.8h, v0.h[7] 1637; CHECK-NEXT: mov v2.h[3], w8 1638; CHECK-NEXT: csetm w8, ls 1639; CHECK-NEXT: fcmp s4, s3 1640; CHECK-NEXT: shll v3.4s, v5.4h, #16 1641; CHECK-NEXT: shll v4.4s, v6.4h, #16 1642; CHECK-NEXT: shll v1.4s, v1.4h, #16 1643; CHECK-NEXT: shll v0.4s, v0.4h, #16 1644; CHECK-NEXT: mov v2.h[4], w8 1645; CHECK-NEXT: csetm w8, ls 1646; CHECK-NEXT: fcmp s4, s3 1647; CHECK-NEXT: mov v2.h[5], w8 1648; CHECK-NEXT: csetm w8, ls 1649; CHECK-NEXT: fcmp s0, s1 1650; CHECK-NEXT: mov v2.h[6], w8 1651; CHECK-NEXT: csetm w8, ls 1652; CHECK-NEXT: mov v2.h[7], w8 1653; CHECK-NEXT: xtn v0.8b, v2.8h 1654; CHECK-NEXT: ret 1655 %1 = fcmp ole <8 x bfloat> %a, %b 1656 ret <8 x i1> %1 1657} 1658 1659define <8 x i1> @test_fcmp_ord(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 1660; CHECK-LABEL: test_fcmp_ord: 1661; CHECK: // %bb.0: 1662; CHECK-NEXT: dup v2.4h, v1.h[1] 1663; CHECK-NEXT: dup v3.4h, v0.h[1] 1664; CHECK-NEXT: dup v4.4h, v1.h[2] 1665; CHECK-NEXT: dup v5.4h, v0.h[2] 1666; CHECK-NEXT: dup v6.4h, v0.h[3] 1667; CHECK-NEXT: shll v2.4s, v2.4h, #16 1668; CHECK-NEXT: shll v3.4s, v3.4h, #16 1669; CHECK-NEXT: fcmp s3, s2 1670; CHECK-NEXT: shll v2.4s, v1.4h, #16 1671; CHECK-NEXT: shll v3.4s, v0.4h, #16 1672; CHECK-NEXT: csetm w8, vc 1673; CHECK-NEXT: fcmp s3, s2 1674; CHECK-NEXT: shll v3.4s, v4.4h, #16 1675; CHECK-NEXT: shll v4.4s, v5.4h, #16 1676; CHECK-NEXT: dup v5.4h, v1.h[3] 1677; CHECK-NEXT: csetm w9, vc 1678; CHECK-NEXT: fmov s2, w9 1679; CHECK-NEXT: fcmp s4, s3 1680; CHECK-NEXT: shll v4.4s, v6.4h, #16 1681; CHECK-NEXT: shll v3.4s, v5.4h, #16 1682; CHECK-NEXT: dup v5.8h, v1.h[4] 1683; CHECK-NEXT: dup v6.8h, v0.h[4] 1684; CHECK-NEXT: mov v2.h[1], w8 1685; CHECK-NEXT: csetm w8, vc 1686; CHECK-NEXT: fcmp s4, s3 1687; CHECK-NEXT: shll v3.4s, v5.4h, #16 1688; CHECK-NEXT: shll v4.4s, v6.4h, #16 1689; CHECK-NEXT: dup v5.8h, v1.h[5] 1690; CHECK-NEXT: dup v6.8h, v0.h[5] 1691; CHECK-NEXT: mov v2.h[2], w8 1692; CHECK-NEXT: csetm w8, vc 1693; CHECK-NEXT: fcmp s4, s3 1694; CHECK-NEXT: shll v3.4s, v5.4h, #16 1695; CHECK-NEXT: shll v4.4s, v6.4h, #16 1696; CHECK-NEXT: dup v5.8h, v1.h[6] 1697; CHECK-NEXT: dup v6.8h, v0.h[6] 1698; CHECK-NEXT: dup v1.8h, v1.h[7] 1699; CHECK-NEXT: dup v0.8h, v0.h[7] 1700; CHECK-NEXT: mov v2.h[3], w8 1701; CHECK-NEXT: csetm w8, vc 1702; CHECK-NEXT: fcmp s4, s3 1703; CHECK-NEXT: shll v3.4s, v5.4h, #16 1704; CHECK-NEXT: shll v4.4s, v6.4h, #16 1705; CHECK-NEXT: shll v1.4s, v1.4h, #16 1706; CHECK-NEXT: shll v0.4s, v0.4h, #16 1707; CHECK-NEXT: mov v2.h[4], w8 1708; CHECK-NEXT: csetm w8, vc 1709; CHECK-NEXT: fcmp s4, s3 1710; CHECK-NEXT: mov v2.h[5], w8 1711; CHECK-NEXT: csetm w8, vc 1712; CHECK-NEXT: fcmp s0, s1 1713; CHECK-NEXT: mov v2.h[6], w8 1714; CHECK-NEXT: csetm w8, vc 1715; CHECK-NEXT: mov v2.h[7], w8 1716; CHECK-NEXT: xtn v0.8b, v2.8h 1717; CHECK-NEXT: ret 1718 %1 = fcmp ord <8 x bfloat> %a, %b 1719 ret <8 x i1> %1 1720} 1721 1722attributes #0 = { nounwind } 1723