1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -mattr=-bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-CVT 3; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-BF16 4 5define <4 x bfloat> @add_h(<4 x bfloat> %a, <4 x bfloat> %b) { 6; CHECK-CVT-LABEL: add_h: 7; CHECK-CVT: // %bb.0: // %entry 8; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 9; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 10; CHECK-CVT-NEXT: fadd v0.4s, v0.4s, v1.4s 11; CHECK-CVT-NEXT: movi v1.4s, #1 12; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 13; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 14; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 15; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 16; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 17; CHECK-CVT-NEXT: ret 18; 19; CHECK-BF16-LABEL: add_h: 20; CHECK-BF16: // %bb.0: // %entry 21; CHECK-BF16-NEXT: shll v1.4s, v1.4h, #16 22; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16 23; CHECK-BF16-NEXT: fadd v0.4s, v0.4s, v1.4s 24; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 25; CHECK-BF16-NEXT: ret 26entry: 27 28 %0 = fadd <4 x bfloat> %a, %b 29 ret <4 x bfloat> %0 30} 31 32 33define <4 x bfloat> @build_h4(<4 x bfloat> %a) { 34; CHECK-LABEL: build_h4: 35; CHECK: // %bb.0: // %entry 36; CHECK-NEXT: mov w8, #15565 // =0x3ccd 37; CHECK-NEXT: dup v0.4h, w8 38; CHECK-NEXT: ret 39entry: 40 ret <4 x bfloat> <bfloat 0xR3CCD, bfloat 0xR3CCD, bfloat 0xR3CCD, bfloat 0xR3CCD> 41} 42 43 44define <4 x bfloat> @sub_h(<4 x bfloat> %a, <4 x bfloat> %b) { 45; CHECK-CVT-LABEL: sub_h: 46; CHECK-CVT: // %bb.0: // %entry 47; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 48; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 49; CHECK-CVT-NEXT: fsub v0.4s, v0.4s, v1.4s 50; CHECK-CVT-NEXT: movi v1.4s, #1 51; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 52; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 53; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 54; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 55; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 56; CHECK-CVT-NEXT: ret 57; 58; CHECK-BF16-LABEL: sub_h: 59; CHECK-BF16: // %bb.0: // %entry 60; CHECK-BF16-NEXT: shll v1.4s, v1.4h, #16 61; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16 62; CHECK-BF16-NEXT: fsub v0.4s, v0.4s, v1.4s 63; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 64; CHECK-BF16-NEXT: ret 65entry: 66 67 %0 = fsub <4 x bfloat> %a, %b 68 ret <4 x bfloat> %0 69} 70 71 72define <4 x bfloat> @mul_h(<4 x bfloat> %a, <4 x bfloat> %b) { 73; CHECK-CVT-LABEL: mul_h: 74; CHECK-CVT: // %bb.0: // %entry 75; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 76; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 77; CHECK-CVT-NEXT: fmul v0.4s, v0.4s, v1.4s 78; CHECK-CVT-NEXT: movi v1.4s, #1 79; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 80; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 81; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 82; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 83; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 84; CHECK-CVT-NEXT: ret 85; 86; CHECK-BF16-LABEL: mul_h: 87; CHECK-BF16: // %bb.0: // %entry 88; CHECK-BF16-NEXT: shll v1.4s, v1.4h, #16 89; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16 90; CHECK-BF16-NEXT: fmul v0.4s, v0.4s, v1.4s 91; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 92; CHECK-BF16-NEXT: ret 93entry: 94 95 %0 = fmul <4 x bfloat> %a, %b 96 ret <4 x bfloat> %0 97} 98 99 100define <4 x bfloat> @div_h(<4 x bfloat> %a, <4 x bfloat> %b) { 101; CHECK-CVT-LABEL: div_h: 102; CHECK-CVT: // %bb.0: // %entry 103; CHECK-CVT-NEXT: shll v1.4s, v1.4h, #16 104; CHECK-CVT-NEXT: shll v0.4s, v0.4h, #16 105; CHECK-CVT-NEXT: fdiv v0.4s, v0.4s, v1.4s 106; CHECK-CVT-NEXT: movi v1.4s, #1 107; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 108; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 109; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 110; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 111; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 112; CHECK-CVT-NEXT: ret 113; 114; CHECK-BF16-LABEL: div_h: 115; CHECK-BF16: // %bb.0: // %entry 116; CHECK-BF16-NEXT: shll v1.4s, v1.4h, #16 117; CHECK-BF16-NEXT: shll v0.4s, v0.4h, #16 118; CHECK-BF16-NEXT: fdiv v0.4s, v0.4s, v1.4s 119; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 120; CHECK-BF16-NEXT: ret 121entry: 122 123 %0 = fdiv <4 x bfloat> %a, %b 124 ret <4 x bfloat> %0 125} 126 127 128define <4 x bfloat> @load_h(ptr %a) { 129; CHECK-LABEL: load_h: 130; CHECK: // %bb.0: // %entry 131; CHECK-NEXT: ldr d0, [x0] 132; CHECK-NEXT: ret 133entry: 134 %0 = load <4 x bfloat>, ptr %a, align 4 135 ret <4 x bfloat> %0 136} 137 138 139define void @store_h(ptr %a, <4 x bfloat> %b) { 140; CHECK-LABEL: store_h: 141; CHECK: // %bb.0: // %entry 142; CHECK-NEXT: str d0, [x0] 143; CHECK-NEXT: ret 144entry: 145 store <4 x bfloat> %b, ptr %a, align 4 146 ret void 147} 148 149define <4 x bfloat> @s_to_h(<4 x float> %a) { 150; CHECK-CVT-LABEL: s_to_h: 151; CHECK-CVT: // %bb.0: 152; CHECK-CVT-NEXT: movi v1.4s, #1 153; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 154; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16 155; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b 156; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s 157; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s 158; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 159; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s 160; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b 161; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16 162; CHECK-CVT-NEXT: ret 163; 164; CHECK-BF16-LABEL: s_to_h: 165; CHECK-BF16: // %bb.0: 166; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 167; CHECK-BF16-NEXT: ret 168 %1 = fptrunc <4 x float> %a to <4 x bfloat> 169 ret <4 x bfloat> %1 170} 171 172define <4 x bfloat> @d_to_h(<4 x double> %a) { 173; CHECK-CVT-LABEL: d_to_h: 174; CHECK-CVT: // %bb.0: 175; CHECK-CVT-NEXT: fcvtxn v0.2s, v0.2d 176; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 177; CHECK-CVT-NEXT: fcvtxn2 v0.4s, v1.2d 178; CHECK-CVT-NEXT: movi v1.4s, #1 179; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16 180; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s 181; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b 182; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s 183; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 184; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s 185; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b 186; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16 187; CHECK-CVT-NEXT: ret 188; 189; CHECK-BF16-LABEL: d_to_h: 190; CHECK-BF16: // %bb.0: 191; CHECK-BF16-NEXT: fcvtxn v0.2s, v0.2d 192; CHECK-BF16-NEXT: fcvtxn2 v0.4s, v1.2d 193; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 194; CHECK-BF16-NEXT: ret 195 %1 = fptrunc <4 x double> %a to <4 x bfloat> 196 ret <4 x bfloat> %1 197} 198 199define <4 x float> @h_to_s(<4 x bfloat> %a) { 200; CHECK-LABEL: h_to_s: 201; CHECK: // %bb.0: 202; CHECK-NEXT: shll v0.4s, v0.4h, #16 203; CHECK-NEXT: ret 204 %1 = fpext <4 x bfloat> %a to <4 x float> 205 ret <4 x float> %1 206} 207 208define <4 x double> @h_to_d(<4 x bfloat> %a) { 209; CHECK-LABEL: h_to_d: 210; CHECK: // %bb.0: 211; CHECK-NEXT: shll v0.4s, v0.4h, #16 212; CHECK-NEXT: fcvtl2 v1.2d, v0.4s 213; CHECK-NEXT: fcvtl v0.2d, v0.2s 214; CHECK-NEXT: ret 215 %1 = fpext <4 x bfloat> %a to <4 x double> 216 ret <4 x double> %1 217} 218 219define <4 x bfloat> @bitcast_i_to_h(float, <4 x i16> %a) { 220; CHECK-LABEL: bitcast_i_to_h: 221; CHECK: // %bb.0: 222; CHECK-NEXT: fmov d0, d1 223; CHECK-NEXT: ret 224 %2 = bitcast <4 x i16> %a to <4 x bfloat> 225 ret <4 x bfloat> %2 226} 227 228define <4 x i16> @bitcast_h_to_i(float, <4 x bfloat> %a) { 229; CHECK-LABEL: bitcast_h_to_i: 230; CHECK: // %bb.0: 231; CHECK-NEXT: fmov d0, d1 232; CHECK-NEXT: ret 233 %2 = bitcast <4 x bfloat> %a to <4 x i16> 234 ret <4 x i16> %2 235} 236 237define <4 x bfloat> @sitofp_i8(<4 x i8> %a) #0 { 238; CHECK-CVT-LABEL: sitofp_i8: 239; CHECK-CVT: // %bb.0: 240; CHECK-CVT-NEXT: shl v0.4h, v0.4h, #8 241; CHECK-CVT-NEXT: movi v1.4s, #1 242; CHECK-CVT-NEXT: sshr v0.4h, v0.4h, #8 243; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 244; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s 245; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 246; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 247; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 248; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 249; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 250; CHECK-CVT-NEXT: ret 251; 252; CHECK-BF16-LABEL: sitofp_i8: 253; CHECK-BF16: // %bb.0: 254; CHECK-BF16-NEXT: shl v0.4h, v0.4h, #8 255; CHECK-BF16-NEXT: sshr v0.4h, v0.4h, #8 256; CHECK-BF16-NEXT: sshll v0.4s, v0.4h, #0 257; CHECK-BF16-NEXT: scvtf v0.4s, v0.4s 258; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 259; CHECK-BF16-NEXT: ret 260 %1 = sitofp <4 x i8> %a to <4 x bfloat> 261 ret <4 x bfloat> %1 262} 263 264define <4 x bfloat> @sitofp_i16(<4 x i16> %a) #0 { 265; CHECK-CVT-LABEL: sitofp_i16: 266; CHECK-CVT: // %bb.0: 267; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 268; CHECK-CVT-NEXT: movi v1.4s, #1 269; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s 270; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 271; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 272; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 273; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 274; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 275; CHECK-CVT-NEXT: ret 276; 277; CHECK-BF16-LABEL: sitofp_i16: 278; CHECK-BF16: // %bb.0: 279; CHECK-BF16-NEXT: sshll v0.4s, v0.4h, #0 280; CHECK-BF16-NEXT: scvtf v0.4s, v0.4s 281; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 282; CHECK-BF16-NEXT: ret 283 %1 = sitofp <4 x i16> %a to <4 x bfloat> 284 ret <4 x bfloat> %1 285} 286 287 288define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 { 289; CHECK-CVT-LABEL: sitofp_i32: 290; CHECK-CVT: // %bb.0: 291; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s 292; CHECK-CVT-NEXT: movi v1.4s, #1 293; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 294; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 295; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 296; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 297; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 298; CHECK-CVT-NEXT: ret 299; 300; CHECK-BF16-LABEL: sitofp_i32: 301; CHECK-BF16: // %bb.0: 302; CHECK-BF16-NEXT: scvtf v0.4s, v0.4s 303; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 304; CHECK-BF16-NEXT: ret 305 %1 = sitofp <4 x i32> %a to <4 x bfloat> 306 ret <4 x bfloat> %1 307} 308 309 310define <4 x bfloat> @sitofp_i64(<4 x i64> %a) #0 { 311; CHECK-CVT-LABEL: sitofp_i64: 312; CHECK-CVT: // %bb.0: 313; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d 314; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d 315; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 316; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d 317; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d 318; CHECK-CVT-NEXT: movi v1.4s, #1 319; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16 320; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s 321; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b 322; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s 323; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 324; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s 325; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b 326; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16 327; CHECK-CVT-NEXT: ret 328; 329; CHECK-BF16-LABEL: sitofp_i64: 330; CHECK-BF16: // %bb.0: 331; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d 332; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d 333; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d 334; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d 335; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 336; CHECK-BF16-NEXT: ret 337 %1 = sitofp <4 x i64> %a to <4 x bfloat> 338 ret <4 x bfloat> %1 339} 340 341define <4 x bfloat> @uitofp_i8(<4 x i8> %a) #0 { 342; CHECK-CVT-LABEL: uitofp_i8: 343; CHECK-CVT: // %bb.0: 344; CHECK-CVT-NEXT: bic v0.4h, #255, lsl #8 345; CHECK-CVT-NEXT: movi v1.4s, #1 346; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 347; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s 348; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 349; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 350; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 351; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 352; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 353; CHECK-CVT-NEXT: ret 354; 355; CHECK-BF16-LABEL: uitofp_i8: 356; CHECK-BF16: // %bb.0: 357; CHECK-BF16-NEXT: bic v0.4h, #255, lsl #8 358; CHECK-BF16-NEXT: ushll v0.4s, v0.4h, #0 359; CHECK-BF16-NEXT: ucvtf v0.4s, v0.4s 360; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 361; CHECK-BF16-NEXT: ret 362 %1 = uitofp <4 x i8> %a to <4 x bfloat> 363 ret <4 x bfloat> %1 364} 365 366 367define <4 x bfloat> @uitofp_i16(<4 x i16> %a) #0 { 368; CHECK-CVT-LABEL: uitofp_i16: 369; CHECK-CVT: // %bb.0: 370; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 371; CHECK-CVT-NEXT: movi v1.4s, #1 372; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s 373; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 374; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 375; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 376; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 377; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 378; CHECK-CVT-NEXT: ret 379; 380; CHECK-BF16-LABEL: uitofp_i16: 381; CHECK-BF16: // %bb.0: 382; CHECK-BF16-NEXT: ushll v0.4s, v0.4h, #0 383; CHECK-BF16-NEXT: ucvtf v0.4s, v0.4s 384; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 385; CHECK-BF16-NEXT: ret 386 %1 = uitofp <4 x i16> %a to <4 x bfloat> 387 ret <4 x bfloat> %1 388} 389 390 391define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 { 392; CHECK-CVT-LABEL: uitofp_i32: 393; CHECK-CVT: // %bb.0: 394; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s 395; CHECK-CVT-NEXT: movi v1.4s, #1 396; CHECK-CVT-NEXT: ushr v2.4s, v0.4s, #16 397; CHECK-CVT-NEXT: and v1.16b, v2.16b, v1.16b 398; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 399; CHECK-CVT-NEXT: add v0.4s, v1.4s, v0.4s 400; CHECK-CVT-NEXT: addhn v0.4h, v0.4s, v2.4s 401; CHECK-CVT-NEXT: ret 402; 403; CHECK-BF16-LABEL: uitofp_i32: 404; CHECK-BF16: // %bb.0: 405; CHECK-BF16-NEXT: ucvtf v0.4s, v0.4s 406; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 407; CHECK-BF16-NEXT: ret 408 %1 = uitofp <4 x i32> %a to <4 x bfloat> 409 ret <4 x bfloat> %1 410} 411 412 413define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 { 414; CHECK-CVT-LABEL: uitofp_i64: 415; CHECK-CVT: // %bb.0: 416; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d 417; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d 418; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8 419; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d 420; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d 421; CHECK-CVT-NEXT: movi v1.4s, #1 422; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16 423; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s 424; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b 425; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s 426; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16 427; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s 428; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b 429; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16 430; CHECK-CVT-NEXT: ret 431; 432; CHECK-BF16-LABEL: uitofp_i64: 433; CHECK-BF16: // %bb.0: 434; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d 435; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d 436; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d 437; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d 438; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s 439; CHECK-BF16-NEXT: ret 440 %1 = uitofp <4 x i64> %a to <4 x bfloat> 441 ret <4 x bfloat> %1 442} 443 444define void @test_insert_at_zero(bfloat %a, ptr %b) #0 { 445; CHECK-LABEL: test_insert_at_zero: 446; CHECK: // %bb.0: 447; CHECK-NEXT: // kill: def $h0 killed $h0 def $d0 448; CHECK-NEXT: str d0, [x0] 449; CHECK-NEXT: ret 450 %1 = insertelement <4 x bfloat> undef, bfloat %a, i64 0 451 store <4 x bfloat> %1, ptr %b, align 4 452 ret void 453} 454 455define <4 x i8> @fptosi_i8(<4 x bfloat> %a) #0 { 456; CHECK-LABEL: fptosi_i8: 457; CHECK: // %bb.0: 458; CHECK-NEXT: shll v0.4s, v0.4h, #16 459; CHECK-NEXT: fcvtzs v0.4s, v0.4s 460; CHECK-NEXT: xtn v0.4h, v0.4s 461; CHECK-NEXT: ret 462 %1 = fptosi<4 x bfloat> %a to <4 x i8> 463 ret <4 x i8> %1 464} 465 466define <4 x i16> @fptosi_i16(<4 x bfloat> %a) #0 { 467; CHECK-LABEL: fptosi_i16: 468; CHECK: // %bb.0: 469; CHECK-NEXT: shll v0.4s, v0.4h, #16 470; CHECK-NEXT: fcvtzs v0.4s, v0.4s 471; CHECK-NEXT: xtn v0.4h, v0.4s 472; CHECK-NEXT: ret 473 %1 = fptosi<4 x bfloat> %a to <4 x i16> 474 ret <4 x i16> %1 475} 476 477define <4 x i8> @fptoui_i8(<4 x bfloat> %a) #0 { 478; CHECK-LABEL: fptoui_i8: 479; CHECK: // %bb.0: 480; CHECK-NEXT: shll v0.4s, v0.4h, #16 481; CHECK-NEXT: fcvtzs v0.4s, v0.4s 482; CHECK-NEXT: xtn v0.4h, v0.4s 483; CHECK-NEXT: ret 484; NOTE: fcvtzs selected here because the xtn shaves the sign bit 485 %1 = fptoui<4 x bfloat> %a to <4 x i8> 486 ret <4 x i8> %1 487} 488 489define <4 x i16> @fptoui_i16(<4 x bfloat> %a) #0 { 490; CHECK-LABEL: fptoui_i16: 491; CHECK: // %bb.0: 492; CHECK-NEXT: shll v0.4s, v0.4h, #16 493; CHECK-NEXT: fcvtzu v0.4s, v0.4s 494; CHECK-NEXT: xtn v0.4h, v0.4s 495; CHECK-NEXT: ret 496 %1 = fptoui<4 x bfloat> %a to <4 x i16> 497 ret <4 x i16> %1 498} 499 500define <4 x i1> @test_fcmp_une(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 501; CHECK-LABEL: test_fcmp_une: 502; CHECK: // %bb.0: 503; CHECK-NEXT: shll v1.4s, v1.4h, #16 504; CHECK-NEXT: shll v0.4s, v0.4h, #16 505; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s 506; CHECK-NEXT: mvn v0.16b, v0.16b 507; CHECK-NEXT: xtn v0.4h, v0.4s 508; CHECK-NEXT: ret 509 510 %1 = fcmp une <4 x bfloat> %a, %b 511 ret <4 x i1> %1 512} 513 514define <4 x i1> @test_fcmp_ueq(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 515; CHECK-LABEL: test_fcmp_ueq: 516; CHECK: // %bb.0: 517; CHECK-NEXT: shll v1.4s, v1.4h, #16 518; CHECK-NEXT: shll v0.4s, v0.4h, #16 519; CHECK-NEXT: fcmgt v2.4s, v0.4s, v1.4s 520; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s 521; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b 522; CHECK-NEXT: xtn v0.4h, v0.4s 523; CHECK-NEXT: mvn v0.8b, v0.8b 524; CHECK-NEXT: ret 525 526 %1 = fcmp ueq <4 x bfloat> %a, %b 527 ret <4 x i1> %1 528} 529 530define <4 x i1> @test_fcmp_ugt(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 531; CHECK-LABEL: test_fcmp_ugt: 532; CHECK: // %bb.0: 533; CHECK-NEXT: shll v0.4s, v0.4h, #16 534; CHECK-NEXT: shll v1.4s, v1.4h, #16 535; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s 536; CHECK-NEXT: xtn v0.4h, v0.4s 537; CHECK-NEXT: mvn v0.8b, v0.8b 538; CHECK-NEXT: ret 539 540 %1 = fcmp ugt <4 x bfloat> %a, %b 541 ret <4 x i1> %1 542} 543 544define <4 x i1> @test_fcmp_uge(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 545; CHECK-LABEL: test_fcmp_uge: 546; CHECK: // %bb.0: 547; CHECK-NEXT: shll v0.4s, v0.4h, #16 548; CHECK-NEXT: shll v1.4s, v1.4h, #16 549; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s 550; CHECK-NEXT: xtn v0.4h, v0.4s 551; CHECK-NEXT: mvn v0.8b, v0.8b 552; CHECK-NEXT: ret 553 554 %1 = fcmp uge <4 x bfloat> %a, %b 555 ret <4 x i1> %1 556} 557 558define <4 x i1> @test_fcmp_ult(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 559; CHECK-LABEL: test_fcmp_ult: 560; CHECK: // %bb.0: 561; CHECK-NEXT: shll v1.4s, v1.4h, #16 562; CHECK-NEXT: shll v0.4s, v0.4h, #16 563; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s 564; CHECK-NEXT: xtn v0.4h, v0.4s 565; CHECK-NEXT: mvn v0.8b, v0.8b 566; CHECK-NEXT: ret 567 568 %1 = fcmp ult <4 x bfloat> %a, %b 569 ret <4 x i1> %1 570} 571 572define <4 x i1> @test_fcmp_ule(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 573; CHECK-LABEL: test_fcmp_ule: 574; CHECK: // %bb.0: 575; CHECK-NEXT: shll v1.4s, v1.4h, #16 576; CHECK-NEXT: shll v0.4s, v0.4h, #16 577; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s 578; CHECK-NEXT: xtn v0.4h, v0.4s 579; CHECK-NEXT: mvn v0.8b, v0.8b 580; CHECK-NEXT: ret 581 582 %1 = fcmp ule <4 x bfloat> %a, %b 583 ret <4 x i1> %1 584} 585 586define <4 x i1> @test_fcmp_uno(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 587; CHECK-LABEL: test_fcmp_uno: 588; CHECK: // %bb.0: 589; CHECK-NEXT: shll v1.4s, v1.4h, #16 590; CHECK-NEXT: shll v0.4s, v0.4h, #16 591; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s 592; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s 593; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b 594; CHECK-NEXT: xtn v0.4h, v0.4s 595; CHECK-NEXT: mvn v0.8b, v0.8b 596; CHECK-NEXT: ret 597 598 %1 = fcmp uno <4 x bfloat> %a, %b 599 ret <4 x i1> %1 600} 601 602define <4 x i1> @test_fcmp_one(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 603; CHECK-LABEL: test_fcmp_one: 604; CHECK: // %bb.0: 605; CHECK-NEXT: shll v1.4s, v1.4h, #16 606; CHECK-NEXT: shll v0.4s, v0.4h, #16 607; CHECK-NEXT: fcmgt v2.4s, v0.4s, v1.4s 608; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s 609; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b 610; CHECK-NEXT: xtn v0.4h, v0.4s 611; CHECK-NEXT: ret 612 613 %1 = fcmp one <4 x bfloat> %a, %b 614 ret <4 x i1> %1 615} 616 617define <4 x i1> @test_fcmp_oeq(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 618; CHECK-LABEL: test_fcmp_oeq: 619; CHECK: // %bb.0: 620; CHECK-NEXT: shll v1.4s, v1.4h, #16 621; CHECK-NEXT: shll v0.4s, v0.4h, #16 622; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s 623; CHECK-NEXT: xtn v0.4h, v0.4s 624; CHECK-NEXT: ret 625 626 %1 = fcmp oeq <4 x bfloat> %a, %b 627 ret <4 x i1> %1 628} 629 630define <4 x i1> @test_fcmp_ogt(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 631; CHECK-LABEL: test_fcmp_ogt: 632; CHECK: // %bb.0: 633; CHECK-NEXT: shll v1.4s, v1.4h, #16 634; CHECK-NEXT: shll v0.4s, v0.4h, #16 635; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s 636; CHECK-NEXT: xtn v0.4h, v0.4s 637; CHECK-NEXT: ret 638 639 %1 = fcmp ogt <4 x bfloat> %a, %b 640 ret <4 x i1> %1 641} 642 643define <4 x i1> @test_fcmp_oge(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 644; CHECK-LABEL: test_fcmp_oge: 645; CHECK: // %bb.0: 646; CHECK-NEXT: shll v1.4s, v1.4h, #16 647; CHECK-NEXT: shll v0.4s, v0.4h, #16 648; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s 649; CHECK-NEXT: xtn v0.4h, v0.4s 650; CHECK-NEXT: ret 651 652 %1 = fcmp oge <4 x bfloat> %a, %b 653 ret <4 x i1> %1 654} 655 656define <4 x i1> @test_fcmp_olt(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 657; CHECK-LABEL: test_fcmp_olt: 658; CHECK: // %bb.0: 659; CHECK-NEXT: shll v0.4s, v0.4h, #16 660; CHECK-NEXT: shll v1.4s, v1.4h, #16 661; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s 662; CHECK-NEXT: xtn v0.4h, v0.4s 663; CHECK-NEXT: ret 664 665 %1 = fcmp olt <4 x bfloat> %a, %b 666 ret <4 x i1> %1 667} 668 669define <4 x i1> @test_fcmp_ole(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 670; CHECK-LABEL: test_fcmp_ole: 671; CHECK: // %bb.0: 672; CHECK-NEXT: shll v0.4s, v0.4h, #16 673; CHECK-NEXT: shll v1.4s, v1.4h, #16 674; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s 675; CHECK-NEXT: xtn v0.4h, v0.4s 676; CHECK-NEXT: ret 677 678 %1 = fcmp ole <4 x bfloat> %a, %b 679 ret <4 x i1> %1 680} 681 682define <4 x i1> @test_fcmp_ord(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 683; CHECK-LABEL: test_fcmp_ord: 684; CHECK: // %bb.0: 685; CHECK-NEXT: shll v1.4s, v1.4h, #16 686; CHECK-NEXT: shll v0.4s, v0.4h, #16 687; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s 688; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s 689; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b 690; CHECK-NEXT: xtn v0.4h, v0.4s 691; CHECK-NEXT: ret 692 693 %1 = fcmp ord <4 x bfloat> %a, %b 694 ret <4 x i1> %1 695} 696 697attributes #0 = { nounwind } 698