1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 3; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 4; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 5; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 6 7; CHECK-GI: warning: Instruction selection used fallback path for stofp_i64_bf16 8; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i64_bf16 9; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i32_bf16 10; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i32_bf16 11; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i16_bf16 12; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i16_bf16 13; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_i8_bf16 14; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_i8_bf16 15; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i64_v2bf16 16; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i64_v2bf16 17; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i64_v3bf16 18; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i64_v3bf16 19; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i64_v4bf16 20; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i64_v4bf16 21; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i64_v8bf16 22; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i64_v8bf16 23; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i64_v16bf16 24; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i64_v16bf16 25; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i64_v32bf16 26; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i64_v32bf16 27; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i32_v2bf16 28; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i32_v2bf16 29; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i32_v3bf16 30; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i32_v3bf16 31; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i32_v4bf16 32; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i32_v4bf16 33; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i32_v8bf16 34; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i32_v8bf16 35; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i32_v16bf16 36; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i32_v16bf16 37; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i32_v32bf16 38; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i32_v32bf16 39; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i16_v2bf16 40; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i16_v2bf16 41; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i16_v3bf16 42; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i16_v3bf16 43; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i16_v4bf16 44; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i16_v4bf16 45; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i16_v8bf16 46; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i16_v8bf16 47; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i16_v16bf16 48; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i16_v16bf16 49; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i16_v32bf16 50; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i16_v32bf16 51; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v2i8_v2bf16 52; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v2i8_v2bf16 53; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3bf16 54; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3bf16 55; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v4i8_v4bf16 56; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v4i8_v4bf16 57; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v8i8_v8bf16 58; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v8i8_v8bf16 59; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v16i8_v16bf16 60; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v16i8_v16bf16 61; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v32i8_v32bf16 62; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v32i8_v32bf16 63 64define bfloat @stofp_i64_bf16(i64 %a) { 65; CHECK-LABEL: stofp_i64_bf16: 66; CHECK: // %bb.0: // %entry 67; CHECK-NEXT: cmp x0, #0 68; CHECK-NEXT: and x11, x0, #0x8000000000000000 69; CHECK-NEXT: mov w8, #32767 // =0x7fff 70; CHECK-NEXT: cneg x9, x0, mi 71; CHECK-NEXT: lsr x10, x9, #53 72; CHECK-NEXT: cmp x10, #0 73; CHECK-NEXT: and x10, x9, #0xfffffffffffff000 74; CHECK-NEXT: csel x10, x10, x9, ne 75; CHECK-NEXT: scvtf d0, x10 76; CHECK-NEXT: cset w10, ne 77; CHECK-NEXT: tst x9, #0xfff 78; CHECK-NEXT: csel w10, wzr, w10, eq 79; CHECK-NEXT: fmov x9, d0 80; CHECK-NEXT: orr x9, x9, x11 81; CHECK-NEXT: orr x9, x9, x10 82; CHECK-NEXT: fmov d0, x9 83; CHECK-NEXT: fcvtxn s0, d0 84; CHECK-NEXT: fmov w9, s0 85; CHECK-NEXT: ubfx w10, w9, #16, #1 86; CHECK-NEXT: add w8, w9, w8 87; CHECK-NEXT: add w8, w10, w8 88; CHECK-NEXT: lsr w8, w8, #16 89; CHECK-NEXT: fmov s0, w8 90; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 91; CHECK-NEXT: ret 92entry: 93 %c = sitofp i64 %a to bfloat 94 ret bfloat %c 95} 96 97define bfloat @utofp_i64_bf16(i64 %a) { 98; CHECK-LABEL: utofp_i64_bf16: 99; CHECK: // %bb.0: // %entry 100; CHECK-NEXT: lsr x9, x0, #53 101; CHECK-NEXT: mov w8, #32767 // =0x7fff 102; CHECK-NEXT: cmp x9, #0 103; CHECK-NEXT: and x9, x0, #0xfffffffffffff000 104; CHECK-NEXT: csel x9, x9, x0, ne 105; CHECK-NEXT: ucvtf d0, x9 106; CHECK-NEXT: cset w9, ne 107; CHECK-NEXT: tst x0, #0xfff 108; CHECK-NEXT: csel w9, wzr, w9, eq 109; CHECK-NEXT: fmov x10, d0 110; CHECK-NEXT: orr x9, x10, x9 111; CHECK-NEXT: fmov d0, x9 112; CHECK-NEXT: fcvtxn s0, d0 113; CHECK-NEXT: fmov w9, s0 114; CHECK-NEXT: ubfx w10, w9, #16, #1 115; CHECK-NEXT: add w8, w9, w8 116; CHECK-NEXT: add w8, w10, w8 117; CHECK-NEXT: lsr w8, w8, #16 118; CHECK-NEXT: fmov s0, w8 119; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 120; CHECK-NEXT: ret 121entry: 122 %c = uitofp i64 %a to bfloat 123 ret bfloat %c 124} 125 126define bfloat @stofp_i32_bf16(i32 %a) { 127; CHECK-LABEL: stofp_i32_bf16: 128; CHECK: // %bb.0: // %entry 129; CHECK-NEXT: scvtf d0, w0 130; CHECK-NEXT: mov w8, #32767 // =0x7fff 131; CHECK-NEXT: fcvtxn s0, d0 132; CHECK-NEXT: fmov w9, s0 133; CHECK-NEXT: ubfx w10, w9, #16, #1 134; CHECK-NEXT: add w8, w9, w8 135; CHECK-NEXT: add w8, w10, w8 136; CHECK-NEXT: lsr w8, w8, #16 137; CHECK-NEXT: fmov s0, w8 138; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 139; CHECK-NEXT: ret 140entry: 141 %c = sitofp i32 %a to bfloat 142 ret bfloat %c 143} 144 145define bfloat @utofp_i32_bf16(i32 %a) { 146; CHECK-LABEL: utofp_i32_bf16: 147; CHECK: // %bb.0: // %entry 148; CHECK-NEXT: ucvtf d0, w0 149; CHECK-NEXT: mov w8, #32767 // =0x7fff 150; CHECK-NEXT: fcvtxn s0, d0 151; CHECK-NEXT: fmov w9, s0 152; CHECK-NEXT: ubfx w10, w9, #16, #1 153; CHECK-NEXT: add w8, w9, w8 154; CHECK-NEXT: add w8, w10, w8 155; CHECK-NEXT: lsr w8, w8, #16 156; CHECK-NEXT: fmov s0, w8 157; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 158; CHECK-NEXT: ret 159entry: 160 %c = uitofp i32 %a to bfloat 161 ret bfloat %c 162} 163 164define bfloat @stofp_i16_bf16(i16 %a) { 165; CHECK-LABEL: stofp_i16_bf16: 166; CHECK: // %bb.0: // %entry 167; CHECK-NEXT: sxth w9, w0 168; CHECK-NEXT: mov w8, #32767 // =0x7fff 169; CHECK-NEXT: scvtf s0, w9 170; CHECK-NEXT: fmov w9, s0 171; CHECK-NEXT: ubfx w10, w9, #16, #1 172; CHECK-NEXT: add w8, w9, w8 173; CHECK-NEXT: add w8, w10, w8 174; CHECK-NEXT: lsr w8, w8, #16 175; CHECK-NEXT: fmov s0, w8 176; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 177; CHECK-NEXT: ret 178entry: 179 %c = sitofp i16 %a to bfloat 180 ret bfloat %c 181} 182 183define bfloat @utofp_i16_bf16(i16 %a) { 184; CHECK-LABEL: utofp_i16_bf16: 185; CHECK: // %bb.0: // %entry 186; CHECK-NEXT: and w9, w0, #0xffff 187; CHECK-NEXT: mov w8, #32767 // =0x7fff 188; CHECK-NEXT: ucvtf s0, w9 189; CHECK-NEXT: fmov w9, s0 190; CHECK-NEXT: ubfx w10, w9, #16, #1 191; CHECK-NEXT: add w8, w9, w8 192; CHECK-NEXT: add w8, w10, w8 193; CHECK-NEXT: lsr w8, w8, #16 194; CHECK-NEXT: fmov s0, w8 195; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 196; CHECK-NEXT: ret 197entry: 198 %c = uitofp i16 %a to bfloat 199 ret bfloat %c 200} 201 202define bfloat @stofp_i8_bf16(i8 %a) { 203; CHECK-LABEL: stofp_i8_bf16: 204; CHECK: // %bb.0: // %entry 205; CHECK-NEXT: sxtb w9, w0 206; CHECK-NEXT: mov w8, #32767 // =0x7fff 207; CHECK-NEXT: scvtf s0, w9 208; CHECK-NEXT: fmov w9, s0 209; CHECK-NEXT: ubfx w10, w9, #16, #1 210; CHECK-NEXT: add w8, w9, w8 211; CHECK-NEXT: add w8, w10, w8 212; CHECK-NEXT: lsr w8, w8, #16 213; CHECK-NEXT: fmov s0, w8 214; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 215; CHECK-NEXT: ret 216entry: 217 %c = sitofp i8 %a to bfloat 218 ret bfloat %c 219} 220 221define bfloat @utofp_i8_bf16(i8 %a) { 222; CHECK-LABEL: utofp_i8_bf16: 223; CHECK: // %bb.0: // %entry 224; CHECK-NEXT: and w9, w0, #0xff 225; CHECK-NEXT: mov w8, #32767 // =0x7fff 226; CHECK-NEXT: ucvtf s0, w9 227; CHECK-NEXT: fmov w9, s0 228; CHECK-NEXT: ubfx w10, w9, #16, #1 229; CHECK-NEXT: add w8, w9, w8 230; CHECK-NEXT: add w8, w10, w8 231; CHECK-NEXT: lsr w8, w8, #16 232; CHECK-NEXT: fmov s0, w8 233; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 234; CHECK-NEXT: ret 235entry: 236 %c = uitofp i8 %a to bfloat 237 ret bfloat %c 238} 239 240define <2 x bfloat> @stofp_v2i64_v2bf16(<2 x i64> %a) { 241; CHECK-LABEL: stofp_v2i64_v2bf16: 242; CHECK: // %bb.0: // %entry 243; CHECK-NEXT: mov x9, v0.d[1] 244; CHECK-NEXT: mov w8, #32767 // =0x7fff 245; CHECK-NEXT: cmp x9, #0 246; CHECK-NEXT: cneg x10, x9, mi 247; CHECK-NEXT: and x9, x9, #0x8000000000000000 248; CHECK-NEXT: lsr x11, x10, #53 249; CHECK-NEXT: and x12, x10, #0xfffffffffffff000 250; CHECK-NEXT: cmp x11, #0 251; CHECK-NEXT: csel x11, x12, x10, ne 252; CHECK-NEXT: cset w12, ne 253; CHECK-NEXT: tst x10, #0xfff 254; CHECK-NEXT: fmov x10, d0 255; CHECK-NEXT: csel w12, wzr, w12, eq 256; CHECK-NEXT: scvtf d0, x11 257; CHECK-NEXT: cmp x10, #0 258; CHECK-NEXT: cneg x13, x10, mi 259; CHECK-NEXT: and x10, x10, #0x8000000000000000 260; CHECK-NEXT: lsr x14, x13, #53 261; CHECK-NEXT: cmp x14, #0 262; CHECK-NEXT: and x14, x13, #0xfffffffffffff000 263; CHECK-NEXT: csel x11, x14, x13, ne 264; CHECK-NEXT: cset w14, ne 265; CHECK-NEXT: tst x13, #0xfff 266; CHECK-NEXT: scvtf d1, x11 267; CHECK-NEXT: fmov x11, d0 268; CHECK-NEXT: orr x9, x11, x9 269; CHECK-NEXT: csel w11, wzr, w14, eq 270; CHECK-NEXT: fmov x13, d1 271; CHECK-NEXT: orr x9, x9, x12 272; CHECK-NEXT: fmov d0, x9 273; CHECK-NEXT: orr x10, x13, x10 274; CHECK-NEXT: orr x10, x10, x11 275; CHECK-NEXT: fcvtxn s0, d0 276; CHECK-NEXT: fmov d1, x10 277; CHECK-NEXT: fcvtxn s1, d1 278; CHECK-NEXT: fmov w9, s0 279; CHECK-NEXT: ubfx w11, w9, #16, #1 280; CHECK-NEXT: add w9, w9, w8 281; CHECK-NEXT: fmov w10, s1 282; CHECK-NEXT: add w9, w11, w9 283; CHECK-NEXT: lsr w9, w9, #16 284; CHECK-NEXT: ubfx w12, w10, #16, #1 285; CHECK-NEXT: add w8, w10, w8 286; CHECK-NEXT: fmov s1, w9 287; CHECK-NEXT: add w8, w12, w8 288; CHECK-NEXT: lsr w8, w8, #16 289; CHECK-NEXT: fmov s0, w8 290; CHECK-NEXT: mov v0.h[1], v1.h[0] 291; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 292; CHECK-NEXT: ret 293entry: 294 %c = sitofp <2 x i64> %a to <2 x bfloat> 295 ret <2 x bfloat> %c 296} 297 298define <2 x bfloat> @utofp_v2i64_v2bf16(<2 x i64> %a) { 299; CHECK-LABEL: utofp_v2i64_v2bf16: 300; CHECK: // %bb.0: // %entry 301; CHECK-NEXT: mov x9, v0.d[1] 302; CHECK-NEXT: fmov x11, d0 303; CHECK-NEXT: mov w8, #32767 // =0x7fff 304; CHECK-NEXT: lsr x10, x9, #53 305; CHECK-NEXT: and x12, x9, #0xfffffffffffff000 306; CHECK-NEXT: cmp x10, #0 307; CHECK-NEXT: lsr x10, x11, #53 308; CHECK-NEXT: csel x12, x12, x9, ne 309; CHECK-NEXT: cset w13, ne 310; CHECK-NEXT: tst x9, #0xfff 311; CHECK-NEXT: csel w9, wzr, w13, eq 312; CHECK-NEXT: cmp x10, #0 313; CHECK-NEXT: and x10, x11, #0xfffffffffffff000 314; CHECK-NEXT: csel x10, x10, x11, ne 315; CHECK-NEXT: ucvtf d0, x12 316; CHECK-NEXT: ucvtf d1, x10 317; CHECK-NEXT: cset w10, ne 318; CHECK-NEXT: tst x11, #0xfff 319; CHECK-NEXT: csel w10, wzr, w10, eq 320; CHECK-NEXT: fmov x11, d0 321; CHECK-NEXT: fmov x12, d1 322; CHECK-NEXT: orr x9, x11, x9 323; CHECK-NEXT: orr x10, x12, x10 324; CHECK-NEXT: fmov d0, x9 325; CHECK-NEXT: fmov d1, x10 326; CHECK-NEXT: fcvtxn s0, d0 327; CHECK-NEXT: fcvtxn s1, d1 328; CHECK-NEXT: fmov w9, s0 329; CHECK-NEXT: fmov w10, s1 330; CHECK-NEXT: ubfx w11, w9, #16, #1 331; CHECK-NEXT: add w9, w9, w8 332; CHECK-NEXT: ubfx w12, w10, #16, #1 333; CHECK-NEXT: add w8, w10, w8 334; CHECK-NEXT: add w9, w11, w9 335; CHECK-NEXT: add w8, w12, w8 336; CHECK-NEXT: lsr w9, w9, #16 337; CHECK-NEXT: lsr w8, w8, #16 338; CHECK-NEXT: fmov s1, w9 339; CHECK-NEXT: fmov s0, w8 340; CHECK-NEXT: mov v0.h[1], v1.h[0] 341; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 342; CHECK-NEXT: ret 343entry: 344 %c = uitofp <2 x i64> %a to <2 x bfloat> 345 ret <2 x bfloat> %c 346} 347 348define <3 x bfloat> @stofp_v3i64_v3bf16(<3 x i64> %a) { 349; CHECK-LABEL: stofp_v3i64_v3bf16: 350; CHECK: // %bb.0: // %entry 351; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 352; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 353; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 354; CHECK-NEXT: mov v0.d[1], v1.d[0] 355; CHECK-NEXT: scvtf v1.2d, v2.2d 356; CHECK-NEXT: movi v2.4s, #127, msl #8 357; CHECK-NEXT: scvtf v0.2d, v0.2d 358; CHECK-NEXT: fcvtn v0.2s, v0.2d 359; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 360; CHECK-NEXT: movi v1.4s, #1 361; CHECK-NEXT: ushr v3.4s, v0.4s, #16 362; CHECK-NEXT: add v2.4s, v0.4s, v2.4s 363; CHECK-NEXT: and v1.16b, v3.16b, v1.16b 364; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s 365; CHECK-NEXT: orr v0.4s, #64, lsl #16 366; CHECK-NEXT: add v1.4s, v1.4s, v2.4s 367; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b 368; CHECK-NEXT: shrn v0.4h, v0.4s, #16 369; CHECK-NEXT: ret 370entry: 371 %c = sitofp <3 x i64> %a to <3 x bfloat> 372 ret <3 x bfloat> %c 373} 374 375define <3 x bfloat> @utofp_v3i64_v3bf16(<3 x i64> %a) { 376; CHECK-LABEL: utofp_v3i64_v3bf16: 377; CHECK: // %bb.0: // %entry 378; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 379; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 380; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 381; CHECK-NEXT: mov v0.d[1], v1.d[0] 382; CHECK-NEXT: ucvtf v1.2d, v2.2d 383; CHECK-NEXT: movi v2.4s, #127, msl #8 384; CHECK-NEXT: ucvtf v0.2d, v0.2d 385; CHECK-NEXT: fcvtn v0.2s, v0.2d 386; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 387; CHECK-NEXT: movi v1.4s, #1 388; CHECK-NEXT: ushr v3.4s, v0.4s, #16 389; CHECK-NEXT: add v2.4s, v0.4s, v2.4s 390; CHECK-NEXT: and v1.16b, v3.16b, v1.16b 391; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s 392; CHECK-NEXT: orr v0.4s, #64, lsl #16 393; CHECK-NEXT: add v1.4s, v1.4s, v2.4s 394; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b 395; CHECK-NEXT: shrn v0.4h, v0.4s, #16 396; CHECK-NEXT: ret 397entry: 398 %c = uitofp <3 x i64> %a to <3 x bfloat> 399 ret <3 x bfloat> %c 400} 401 402define <4 x bfloat> @stofp_v4i64_v4bf16(<4 x i64> %a) { 403; CHECK-LABEL: stofp_v4i64_v4bf16: 404; CHECK: // %bb.0: // %entry 405; CHECK-NEXT: scvtf v0.2d, v0.2d 406; CHECK-NEXT: scvtf v1.2d, v1.2d 407; CHECK-NEXT: movi v2.4s, #127, msl #8 408; CHECK-NEXT: fcvtn v0.2s, v0.2d 409; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 410; CHECK-NEXT: movi v1.4s, #1 411; CHECK-NEXT: ushr v3.4s, v0.4s, #16 412; CHECK-NEXT: add v2.4s, v0.4s, v2.4s 413; CHECK-NEXT: and v1.16b, v3.16b, v1.16b 414; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s 415; CHECK-NEXT: orr v0.4s, #64, lsl #16 416; CHECK-NEXT: add v1.4s, v1.4s, v2.4s 417; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b 418; CHECK-NEXT: shrn v0.4h, v0.4s, #16 419; CHECK-NEXT: ret 420entry: 421 %c = sitofp <4 x i64> %a to <4 x bfloat> 422 ret <4 x bfloat> %c 423} 424 425define <4 x bfloat> @utofp_v4i64_v4bf16(<4 x i64> %a) { 426; CHECK-LABEL: utofp_v4i64_v4bf16: 427; CHECK: // %bb.0: // %entry 428; CHECK-NEXT: ucvtf v0.2d, v0.2d 429; CHECK-NEXT: ucvtf v1.2d, v1.2d 430; CHECK-NEXT: movi v2.4s, #127, msl #8 431; CHECK-NEXT: fcvtn v0.2s, v0.2d 432; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 433; CHECK-NEXT: movi v1.4s, #1 434; CHECK-NEXT: ushr v3.4s, v0.4s, #16 435; CHECK-NEXT: add v2.4s, v0.4s, v2.4s 436; CHECK-NEXT: and v1.16b, v3.16b, v1.16b 437; CHECK-NEXT: fcmeq v3.4s, v0.4s, v0.4s 438; CHECK-NEXT: orr v0.4s, #64, lsl #16 439; CHECK-NEXT: add v1.4s, v1.4s, v2.4s 440; CHECK-NEXT: bit v0.16b, v1.16b, v3.16b 441; CHECK-NEXT: shrn v0.4h, v0.4s, #16 442; CHECK-NEXT: ret 443entry: 444 %c = uitofp <4 x i64> %a to <4 x bfloat> 445 ret <4 x bfloat> %c 446} 447 448define <8 x bfloat> @stofp_v8i64_v8bf16(<8 x i64> %a) { 449; CHECK-LABEL: stofp_v8i64_v8bf16: 450; CHECK: // %bb.0: // %entry 451; CHECK-NEXT: scvtf v2.2d, v2.2d 452; CHECK-NEXT: scvtf v0.2d, v0.2d 453; CHECK-NEXT: scvtf v3.2d, v3.2d 454; CHECK-NEXT: scvtf v1.2d, v1.2d 455; CHECK-NEXT: fcvtn v2.2s, v2.2d 456; CHECK-NEXT: fcvtn v0.2s, v0.2d 457; CHECK-NEXT: fcvtn2 v2.4s, v3.2d 458; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 459; CHECK-NEXT: movi v1.4s, #1 460; CHECK-NEXT: movi v3.4s, #127, msl #8 461; CHECK-NEXT: ushr v4.4s, v2.4s, #16 462; CHECK-NEXT: ushr v5.4s, v0.4s, #16 463; CHECK-NEXT: add v6.4s, v2.4s, v3.4s 464; CHECK-NEXT: add v3.4s, v0.4s, v3.4s 465; CHECK-NEXT: and v4.16b, v4.16b, v1.16b 466; CHECK-NEXT: and v1.16b, v5.16b, v1.16b 467; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s 468; CHECK-NEXT: orr v2.4s, #64, lsl #16 469; CHECK-NEXT: add v4.4s, v4.4s, v6.4s 470; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s 471; CHECK-NEXT: add v1.4s, v1.4s, v3.4s 472; CHECK-NEXT: orr v0.4s, #64, lsl #16 473; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b 474; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b 475; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h 476; CHECK-NEXT: ret 477entry: 478 %c = sitofp <8 x i64> %a to <8 x bfloat> 479 ret <8 x bfloat> %c 480} 481 482define <8 x bfloat> @utofp_v8i64_v8bf16(<8 x i64> %a) { 483; CHECK-LABEL: utofp_v8i64_v8bf16: 484; CHECK: // %bb.0: // %entry 485; CHECK-NEXT: ucvtf v2.2d, v2.2d 486; CHECK-NEXT: ucvtf v0.2d, v0.2d 487; CHECK-NEXT: ucvtf v3.2d, v3.2d 488; CHECK-NEXT: ucvtf v1.2d, v1.2d 489; CHECK-NEXT: fcvtn v2.2s, v2.2d 490; CHECK-NEXT: fcvtn v0.2s, v0.2d 491; CHECK-NEXT: fcvtn2 v2.4s, v3.2d 492; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 493; CHECK-NEXT: movi v1.4s, #1 494; CHECK-NEXT: movi v3.4s, #127, msl #8 495; CHECK-NEXT: ushr v4.4s, v2.4s, #16 496; CHECK-NEXT: ushr v5.4s, v0.4s, #16 497; CHECK-NEXT: add v6.4s, v2.4s, v3.4s 498; CHECK-NEXT: add v3.4s, v0.4s, v3.4s 499; CHECK-NEXT: and v4.16b, v4.16b, v1.16b 500; CHECK-NEXT: and v1.16b, v5.16b, v1.16b 501; CHECK-NEXT: fcmeq v5.4s, v2.4s, v2.4s 502; CHECK-NEXT: orr v2.4s, #64, lsl #16 503; CHECK-NEXT: add v4.4s, v4.4s, v6.4s 504; CHECK-NEXT: fcmeq v6.4s, v0.4s, v0.4s 505; CHECK-NEXT: add v1.4s, v1.4s, v3.4s 506; CHECK-NEXT: orr v0.4s, #64, lsl #16 507; CHECK-NEXT: bit v2.16b, v4.16b, v5.16b 508; CHECK-NEXT: bit v0.16b, v1.16b, v6.16b 509; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h 510; CHECK-NEXT: ret 511entry: 512 %c = uitofp <8 x i64> %a to <8 x bfloat> 513 ret <8 x bfloat> %c 514} 515 516define <16 x bfloat> @stofp_v16i64_v16bf16(<16 x i64> %a) { 517; CHECK-LABEL: stofp_v16i64_v16bf16: 518; CHECK: // %bb.0: // %entry 519; CHECK-NEXT: scvtf v0.2d, v0.2d 520; CHECK-NEXT: scvtf v2.2d, v2.2d 521; CHECK-NEXT: scvtf v6.2d, v6.2d 522; CHECK-NEXT: scvtf v4.2d, v4.2d 523; CHECK-NEXT: scvtf v1.2d, v1.2d 524; CHECK-NEXT: scvtf v3.2d, v3.2d 525; CHECK-NEXT: scvtf v7.2d, v7.2d 526; CHECK-NEXT: scvtf v5.2d, v5.2d 527; CHECK-NEXT: fcvtn v0.2s, v0.2d 528; CHECK-NEXT: fcvtn v2.2s, v2.2d 529; CHECK-NEXT: fcvtn v6.2s, v6.2d 530; CHECK-NEXT: fcvtn v4.2s, v4.2d 531; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 532; CHECK-NEXT: fcvtn2 v2.4s, v3.2d 533; CHECK-NEXT: fcvtn2 v6.4s, v7.2d 534; CHECK-NEXT: fcvtn2 v4.4s, v5.2d 535; CHECK-NEXT: movi v1.4s, #1 536; CHECK-NEXT: movi v3.4s, #127, msl #8 537; CHECK-NEXT: ushr v7.4s, v0.4s, #16 538; CHECK-NEXT: ushr v5.4s, v2.4s, #16 539; CHECK-NEXT: ushr v16.4s, v6.4s, #16 540; CHECK-NEXT: ushr v17.4s, v4.4s, #16 541; CHECK-NEXT: add v19.4s, v0.4s, v3.4s 542; CHECK-NEXT: add v18.4s, v2.4s, v3.4s 543; CHECK-NEXT: add v20.4s, v6.4s, v3.4s 544; CHECK-NEXT: add v3.4s, v4.4s, v3.4s 545; CHECK-NEXT: and v7.16b, v7.16b, v1.16b 546; CHECK-NEXT: and v5.16b, v5.16b, v1.16b 547; CHECK-NEXT: and v16.16b, v16.16b, v1.16b 548; CHECK-NEXT: and v1.16b, v17.16b, v1.16b 549; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s 550; CHECK-NEXT: orr v2.4s, #64, lsl #16 551; CHECK-NEXT: add v7.4s, v7.4s, v19.4s 552; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s 553; CHECK-NEXT: add v5.4s, v5.4s, v18.4s 554; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s 555; CHECK-NEXT: add v1.4s, v1.4s, v3.4s 556; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s 557; CHECK-NEXT: add v16.4s, v16.4s, v20.4s 558; CHECK-NEXT: orr v0.4s, #64, lsl #16 559; CHECK-NEXT: orr v6.4s, #64, lsl #16 560; CHECK-NEXT: orr v4.4s, #64, lsl #16 561; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b 562; CHECK-NEXT: mov v5.16b, v19.16b 563; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b 564; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b 565; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b 566; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h 567; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h 568; CHECK-NEXT: ret 569entry: 570 %c = sitofp <16 x i64> %a to <16 x bfloat> 571 ret <16 x bfloat> %c 572} 573 574define <16 x bfloat> @utofp_v16i64_v16bf16(<16 x i64> %a) { 575; CHECK-LABEL: utofp_v16i64_v16bf16: 576; CHECK: // %bb.0: // %entry 577; CHECK-NEXT: ucvtf v0.2d, v0.2d 578; CHECK-NEXT: ucvtf v2.2d, v2.2d 579; CHECK-NEXT: ucvtf v6.2d, v6.2d 580; CHECK-NEXT: ucvtf v4.2d, v4.2d 581; CHECK-NEXT: ucvtf v1.2d, v1.2d 582; CHECK-NEXT: ucvtf v3.2d, v3.2d 583; CHECK-NEXT: ucvtf v7.2d, v7.2d 584; CHECK-NEXT: ucvtf v5.2d, v5.2d 585; CHECK-NEXT: fcvtn v0.2s, v0.2d 586; CHECK-NEXT: fcvtn v2.2s, v2.2d 587; CHECK-NEXT: fcvtn v6.2s, v6.2d 588; CHECK-NEXT: fcvtn v4.2s, v4.2d 589; CHECK-NEXT: fcvtn2 v0.4s, v1.2d 590; CHECK-NEXT: fcvtn2 v2.4s, v3.2d 591; CHECK-NEXT: fcvtn2 v6.4s, v7.2d 592; CHECK-NEXT: fcvtn2 v4.4s, v5.2d 593; CHECK-NEXT: movi v1.4s, #1 594; CHECK-NEXT: movi v3.4s, #127, msl #8 595; CHECK-NEXT: ushr v7.4s, v0.4s, #16 596; CHECK-NEXT: ushr v5.4s, v2.4s, #16 597; CHECK-NEXT: ushr v16.4s, v6.4s, #16 598; CHECK-NEXT: ushr v17.4s, v4.4s, #16 599; CHECK-NEXT: add v19.4s, v0.4s, v3.4s 600; CHECK-NEXT: add v18.4s, v2.4s, v3.4s 601; CHECK-NEXT: add v20.4s, v6.4s, v3.4s 602; CHECK-NEXT: add v3.4s, v4.4s, v3.4s 603; CHECK-NEXT: and v7.16b, v7.16b, v1.16b 604; CHECK-NEXT: and v5.16b, v5.16b, v1.16b 605; CHECK-NEXT: and v16.16b, v16.16b, v1.16b 606; CHECK-NEXT: and v1.16b, v17.16b, v1.16b 607; CHECK-NEXT: fcmeq v17.4s, v2.4s, v2.4s 608; CHECK-NEXT: orr v2.4s, #64, lsl #16 609; CHECK-NEXT: add v7.4s, v7.4s, v19.4s 610; CHECK-NEXT: fcmeq v19.4s, v6.4s, v6.4s 611; CHECK-NEXT: add v5.4s, v5.4s, v18.4s 612; CHECK-NEXT: fcmeq v18.4s, v0.4s, v0.4s 613; CHECK-NEXT: add v1.4s, v1.4s, v3.4s 614; CHECK-NEXT: fcmeq v3.4s, v4.4s, v4.4s 615; CHECK-NEXT: add v16.4s, v16.4s, v20.4s 616; CHECK-NEXT: orr v0.4s, #64, lsl #16 617; CHECK-NEXT: orr v6.4s, #64, lsl #16 618; CHECK-NEXT: orr v4.4s, #64, lsl #16 619; CHECK-NEXT: bit v2.16b, v5.16b, v17.16b 620; CHECK-NEXT: mov v5.16b, v19.16b 621; CHECK-NEXT: bit v0.16b, v7.16b, v18.16b 622; CHECK-NEXT: bif v1.16b, v4.16b, v3.16b 623; CHECK-NEXT: bsl v5.16b, v16.16b, v6.16b 624; CHECK-NEXT: uzp2 v0.8h, v0.8h, v2.8h 625; CHECK-NEXT: uzp2 v1.8h, v1.8h, v5.8h 626; CHECK-NEXT: ret 627entry: 628 %c = uitofp <16 x i64> %a to <16 x bfloat> 629 ret <16 x bfloat> %c 630} 631 632define <32 x bfloat> @stofp_v32i64_v32bf16(<32 x i64> %a) { 633; CHECK-LABEL: stofp_v32i64_v32bf16: 634; CHECK: // %bb.0: // %entry 635; CHECK-NEXT: scvtf v17.2d, v2.2d 636; CHECK-NEXT: scvtf v18.2d, v0.2d 637; CHECK-NEXT: scvtf v19.2d, v3.2d 638; CHECK-NEXT: scvtf v3.2d, v6.2d 639; CHECK-NEXT: ldp q21, q20, [sp, #32] 640; CHECK-NEXT: scvtf v4.2d, v4.2d 641; CHECK-NEXT: scvtf v6.2d, v7.2d 642; CHECK-NEXT: scvtf v5.2d, v5.2d 643; CHECK-NEXT: ldp q24, q23, [sp, #64] 644; CHECK-NEXT: movi v16.4s, #1 645; CHECK-NEXT: fcvtn v0.2s, v17.2d 646; CHECK-NEXT: scvtf v17.2d, v1.2d 647; CHECK-NEXT: fcvtn v1.2s, v18.2d 648; CHECK-NEXT: fcvtn v3.2s, v3.2d 649; CHECK-NEXT: ldp q18, q7, [sp] 650; CHECK-NEXT: scvtf v21.2d, v21.2d 651; CHECK-NEXT: fcvtn v4.2s, v4.2d 652; CHECK-NEXT: movi v2.4s, #127, msl #8 653; CHECK-NEXT: scvtf v20.2d, v20.2d 654; CHECK-NEXT: fcvtn2 v0.4s, v19.2d 655; CHECK-NEXT: ldp q22, q19, [sp, #96] 656; CHECK-NEXT: fcvtn2 v1.4s, v17.2d 657; CHECK-NEXT: fcvtn2 v3.4s, v6.2d 658; CHECK-NEXT: scvtf v18.2d, v18.2d 659; CHECK-NEXT: scvtf v17.2d, v24.2d 660; CHECK-NEXT: fcvtn v6.2s, v21.2d 661; CHECK-NEXT: fcvtn2 v4.4s, v5.2d 662; CHECK-NEXT: scvtf v22.2d, v22.2d 663; CHECK-NEXT: scvtf v21.2d, v23.2d 664; CHECK-NEXT: scvtf v7.2d, v7.2d 665; CHECK-NEXT: ushr v24.4s, v0.4s, #16 666; CHECK-NEXT: add v5.4s, v0.4s, v2.4s 667; CHECK-NEXT: scvtf v19.2d, v19.2d 668; CHECK-NEXT: ushr v23.4s, v1.4s, #16 669; CHECK-NEXT: ushr v25.4s, v3.4s, #16 670; CHECK-NEXT: fcvtn v18.2s, v18.2d 671; CHECK-NEXT: fcvtn2 v6.4s, v20.2d 672; CHECK-NEXT: add v26.4s, v1.4s, v2.4s 673; CHECK-NEXT: fcvtn v17.2s, v17.2d 674; CHECK-NEXT: and v24.16b, v24.16b, v16.16b 675; CHECK-NEXT: fcvtn v22.2s, v22.2d 676; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s 677; CHECK-NEXT: and v23.16b, v23.16b, v16.16b 678; CHECK-NEXT: orr v0.4s, #64, lsl #16 679; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s 680; CHECK-NEXT: fcvtn2 v18.4s, v7.2d 681; CHECK-NEXT: add v7.4s, v3.4s, v2.4s 682; CHECK-NEXT: orr v3.4s, #64, lsl #16 683; CHECK-NEXT: add v5.4s, v24.4s, v5.4s 684; CHECK-NEXT: and v24.16b, v25.16b, v16.16b 685; CHECK-NEXT: ushr v25.4s, v4.4s, #16 686; CHECK-NEXT: fcvtn2 v22.4s, v19.2d 687; CHECK-NEXT: add v19.4s, v23.4s, v26.4s 688; CHECK-NEXT: ushr v26.4s, v6.4s, #16 689; CHECK-NEXT: fcvtn2 v17.4s, v21.2d 690; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s 691; CHECK-NEXT: orr v1.4s, #64, lsl #16 692; CHECK-NEXT: and v23.16b, v25.16b, v16.16b 693; CHECK-NEXT: add v25.4s, v4.4s, v2.4s 694; CHECK-NEXT: add v7.4s, v24.4s, v7.4s 695; CHECK-NEXT: ushr v24.4s, v18.4s, #16 696; CHECK-NEXT: add v30.4s, v18.4s, v2.4s 697; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b 698; CHECK-NEXT: ushr v28.4s, v22.4s, #16 699; CHECK-NEXT: add v31.4s, v22.4s, v2.4s 700; CHECK-NEXT: add v23.4s, v23.4s, v25.4s 701; CHECK-NEXT: and v25.16b, v26.16b, v16.16b 702; CHECK-NEXT: add v26.4s, v6.4s, v2.4s 703; CHECK-NEXT: ushr v29.4s, v17.4s, #16 704; CHECK-NEXT: and v24.16b, v24.16b, v16.16b 705; CHECK-NEXT: add v2.4s, v17.4s, v2.4s 706; CHECK-NEXT: and v28.16b, v28.16b, v16.16b 707; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b 708; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b 709; CHECK-NEXT: add v25.4s, v25.4s, v26.4s 710; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s 711; CHECK-NEXT: orr v6.4s, #64, lsl #16 712; CHECK-NEXT: and v16.16b, v29.16b, v16.16b 713; CHECK-NEXT: add v24.4s, v24.4s, v30.4s 714; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s 715; CHECK-NEXT: add v28.4s, v28.4s, v31.4s 716; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s 717; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s 718; CHECK-NEXT: orr v4.4s, #64, lsl #16 719; CHECK-NEXT: orr v18.4s, #64, lsl #16 720; CHECK-NEXT: orr v22.4s, #64, lsl #16 721; CHECK-NEXT: mov v5.16b, v26.16b 722; CHECK-NEXT: add v2.4s, v16.4s, v2.4s 723; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s 724; CHECK-NEXT: orr v17.4s, #64, lsl #16 725; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h 726; CHECK-NEXT: mov v7.16b, v31.16b 727; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b 728; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b 729; CHECK-NEXT: mov v6.16b, v30.16b 730; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b 731; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b 732; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b 733; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h 734; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h 735; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h 736; CHECK-NEXT: ret 737entry: 738 %c = sitofp <32 x i64> %a to <32 x bfloat> 739 ret <32 x bfloat> %c 740} 741 742define <32 x bfloat> @utofp_v32i64_v32bf16(<32 x i64> %a) { 743; CHECK-LABEL: utofp_v32i64_v32bf16: 744; CHECK: // %bb.0: // %entry 745; CHECK-NEXT: ucvtf v17.2d, v2.2d 746; CHECK-NEXT: ucvtf v18.2d, v0.2d 747; CHECK-NEXT: ucvtf v19.2d, v3.2d 748; CHECK-NEXT: ucvtf v3.2d, v6.2d 749; CHECK-NEXT: ldp q21, q20, [sp, #32] 750; CHECK-NEXT: ucvtf v4.2d, v4.2d 751; CHECK-NEXT: ucvtf v6.2d, v7.2d 752; CHECK-NEXT: ucvtf v5.2d, v5.2d 753; CHECK-NEXT: ldp q24, q23, [sp, #64] 754; CHECK-NEXT: movi v16.4s, #1 755; CHECK-NEXT: fcvtn v0.2s, v17.2d 756; CHECK-NEXT: ucvtf v17.2d, v1.2d 757; CHECK-NEXT: fcvtn v1.2s, v18.2d 758; CHECK-NEXT: fcvtn v3.2s, v3.2d 759; CHECK-NEXT: ldp q18, q7, [sp] 760; CHECK-NEXT: ucvtf v21.2d, v21.2d 761; CHECK-NEXT: fcvtn v4.2s, v4.2d 762; CHECK-NEXT: movi v2.4s, #127, msl #8 763; CHECK-NEXT: ucvtf v20.2d, v20.2d 764; CHECK-NEXT: fcvtn2 v0.4s, v19.2d 765; CHECK-NEXT: ldp q22, q19, [sp, #96] 766; CHECK-NEXT: fcvtn2 v1.4s, v17.2d 767; CHECK-NEXT: fcvtn2 v3.4s, v6.2d 768; CHECK-NEXT: ucvtf v18.2d, v18.2d 769; CHECK-NEXT: ucvtf v17.2d, v24.2d 770; CHECK-NEXT: fcvtn v6.2s, v21.2d 771; CHECK-NEXT: fcvtn2 v4.4s, v5.2d 772; CHECK-NEXT: ucvtf v22.2d, v22.2d 773; CHECK-NEXT: ucvtf v21.2d, v23.2d 774; CHECK-NEXT: ucvtf v7.2d, v7.2d 775; CHECK-NEXT: ushr v24.4s, v0.4s, #16 776; CHECK-NEXT: add v5.4s, v0.4s, v2.4s 777; CHECK-NEXT: ucvtf v19.2d, v19.2d 778; CHECK-NEXT: ushr v23.4s, v1.4s, #16 779; CHECK-NEXT: ushr v25.4s, v3.4s, #16 780; CHECK-NEXT: fcvtn v18.2s, v18.2d 781; CHECK-NEXT: fcvtn2 v6.4s, v20.2d 782; CHECK-NEXT: add v26.4s, v1.4s, v2.4s 783; CHECK-NEXT: fcvtn v17.2s, v17.2d 784; CHECK-NEXT: and v24.16b, v24.16b, v16.16b 785; CHECK-NEXT: fcvtn v22.2s, v22.2d 786; CHECK-NEXT: fcmeq v20.4s, v0.4s, v0.4s 787; CHECK-NEXT: and v23.16b, v23.16b, v16.16b 788; CHECK-NEXT: orr v0.4s, #64, lsl #16 789; CHECK-NEXT: fcmeq v27.4s, v3.4s, v3.4s 790; CHECK-NEXT: fcvtn2 v18.4s, v7.2d 791; CHECK-NEXT: add v7.4s, v3.4s, v2.4s 792; CHECK-NEXT: orr v3.4s, #64, lsl #16 793; CHECK-NEXT: add v5.4s, v24.4s, v5.4s 794; CHECK-NEXT: and v24.16b, v25.16b, v16.16b 795; CHECK-NEXT: ushr v25.4s, v4.4s, #16 796; CHECK-NEXT: fcvtn2 v22.4s, v19.2d 797; CHECK-NEXT: add v19.4s, v23.4s, v26.4s 798; CHECK-NEXT: ushr v26.4s, v6.4s, #16 799; CHECK-NEXT: fcvtn2 v17.4s, v21.2d 800; CHECK-NEXT: fcmeq v21.4s, v1.4s, v1.4s 801; CHECK-NEXT: orr v1.4s, #64, lsl #16 802; CHECK-NEXT: and v23.16b, v25.16b, v16.16b 803; CHECK-NEXT: add v25.4s, v4.4s, v2.4s 804; CHECK-NEXT: add v7.4s, v24.4s, v7.4s 805; CHECK-NEXT: ushr v24.4s, v18.4s, #16 806; CHECK-NEXT: add v30.4s, v18.4s, v2.4s 807; CHECK-NEXT: bit v0.16b, v5.16b, v20.16b 808; CHECK-NEXT: ushr v28.4s, v22.4s, #16 809; CHECK-NEXT: add v31.4s, v22.4s, v2.4s 810; CHECK-NEXT: add v23.4s, v23.4s, v25.4s 811; CHECK-NEXT: and v25.16b, v26.16b, v16.16b 812; CHECK-NEXT: add v26.4s, v6.4s, v2.4s 813; CHECK-NEXT: ushr v29.4s, v17.4s, #16 814; CHECK-NEXT: and v24.16b, v24.16b, v16.16b 815; CHECK-NEXT: add v2.4s, v17.4s, v2.4s 816; CHECK-NEXT: and v28.16b, v28.16b, v16.16b 817; CHECK-NEXT: bit v3.16b, v7.16b, v27.16b 818; CHECK-NEXT: bit v1.16b, v19.16b, v21.16b 819; CHECK-NEXT: add v25.4s, v25.4s, v26.4s 820; CHECK-NEXT: fcmeq v26.4s, v6.4s, v6.4s 821; CHECK-NEXT: orr v6.4s, #64, lsl #16 822; CHECK-NEXT: and v16.16b, v29.16b, v16.16b 823; CHECK-NEXT: add v24.4s, v24.4s, v30.4s 824; CHECK-NEXT: fcmeq v30.4s, v18.4s, v18.4s 825; CHECK-NEXT: add v28.4s, v28.4s, v31.4s 826; CHECK-NEXT: fcmeq v31.4s, v22.4s, v22.4s 827; CHECK-NEXT: fcmeq v29.4s, v4.4s, v4.4s 828; CHECK-NEXT: orr v4.4s, #64, lsl #16 829; CHECK-NEXT: orr v18.4s, #64, lsl #16 830; CHECK-NEXT: orr v22.4s, #64, lsl #16 831; CHECK-NEXT: mov v5.16b, v26.16b 832; CHECK-NEXT: add v2.4s, v16.4s, v2.4s 833; CHECK-NEXT: fcmeq v16.4s, v17.4s, v17.4s 834; CHECK-NEXT: orr v17.4s, #64, lsl #16 835; CHECK-NEXT: uzp2 v0.8h, v1.8h, v0.8h 836; CHECK-NEXT: mov v7.16b, v31.16b 837; CHECK-NEXT: bit v4.16b, v23.16b, v29.16b 838; CHECK-NEXT: bsl v5.16b, v25.16b, v6.16b 839; CHECK-NEXT: mov v6.16b, v30.16b 840; CHECK-NEXT: bsl v16.16b, v2.16b, v17.16b 841; CHECK-NEXT: bsl v7.16b, v28.16b, v22.16b 842; CHECK-NEXT: bsl v6.16b, v24.16b, v18.16b 843; CHECK-NEXT: uzp2 v1.8h, v4.8h, v3.8h 844; CHECK-NEXT: uzp2 v3.8h, v16.8h, v7.8h 845; CHECK-NEXT: uzp2 v2.8h, v6.8h, v5.8h 846; CHECK-NEXT: ret 847entry: 848 %c = uitofp <32 x i64> %a to <32 x bfloat> 849 ret <32 x bfloat> %c 850} 851 852define <2 x bfloat> @stofp_v2i32_v2bf16(<2 x i32> %a) { 853; CHECK-LABEL: stofp_v2i32_v2bf16: 854; CHECK: // %bb.0: // %entry 855; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 856; CHECK-NEXT: movi v1.4s, #1 857; CHECK-NEXT: scvtf v0.4s, v0.4s 858; CHECK-NEXT: ushr v2.4s, v0.4s, #16 859; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 860; CHECK-NEXT: movi v2.4s, #127, msl #8 861; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 862; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 863; CHECK-NEXT: ret 864entry: 865 %c = sitofp <2 x i32> %a to <2 x bfloat> 866 ret <2 x bfloat> %c 867} 868 869define <2 x bfloat> @utofp_v2i32_v2bf16(<2 x i32> %a) { 870; CHECK-LABEL: utofp_v2i32_v2bf16: 871; CHECK: // %bb.0: // %entry 872; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 873; CHECK-NEXT: movi v1.4s, #1 874; CHECK-NEXT: ucvtf v0.4s, v0.4s 875; CHECK-NEXT: ushr v2.4s, v0.4s, #16 876; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 877; CHECK-NEXT: movi v2.4s, #127, msl #8 878; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 879; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 880; CHECK-NEXT: ret 881entry: 882 %c = uitofp <2 x i32> %a to <2 x bfloat> 883 ret <2 x bfloat> %c 884} 885 886define <3 x bfloat> @stofp_v3i32_v3bf16(<3 x i32> %a) { 887; CHECK-LABEL: stofp_v3i32_v3bf16: 888; CHECK: // %bb.0: // %entry 889; CHECK-NEXT: scvtf v0.4s, v0.4s 890; CHECK-NEXT: movi v1.4s, #1 891; CHECK-NEXT: ushr v2.4s, v0.4s, #16 892; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 893; CHECK-NEXT: movi v2.4s, #127, msl #8 894; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 895; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 896; CHECK-NEXT: ret 897entry: 898 %c = sitofp <3 x i32> %a to <3 x bfloat> 899 ret <3 x bfloat> %c 900} 901 902define <3 x bfloat> @utofp_v3i32_v3bf16(<3 x i32> %a) { 903; CHECK-LABEL: utofp_v3i32_v3bf16: 904; CHECK: // %bb.0: // %entry 905; CHECK-NEXT: ucvtf v0.4s, v0.4s 906; CHECK-NEXT: movi v1.4s, #1 907; CHECK-NEXT: ushr v2.4s, v0.4s, #16 908; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 909; CHECK-NEXT: movi v2.4s, #127, msl #8 910; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 911; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 912; CHECK-NEXT: ret 913entry: 914 %c = uitofp <3 x i32> %a to <3 x bfloat> 915 ret <3 x bfloat> %c 916} 917 918define <4 x bfloat> @stofp_v4i32_v4bf16(<4 x i32> %a) { 919; CHECK-LABEL: stofp_v4i32_v4bf16: 920; CHECK: // %bb.0: // %entry 921; CHECK-NEXT: scvtf v0.4s, v0.4s 922; CHECK-NEXT: movi v1.4s, #1 923; CHECK-NEXT: ushr v2.4s, v0.4s, #16 924; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 925; CHECK-NEXT: movi v2.4s, #127, msl #8 926; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 927; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 928; CHECK-NEXT: ret 929entry: 930 %c = sitofp <4 x i32> %a to <4 x bfloat> 931 ret <4 x bfloat> %c 932} 933 934define <4 x bfloat> @utofp_v4i32_v4bf16(<4 x i32> %a) { 935; CHECK-LABEL: utofp_v4i32_v4bf16: 936; CHECK: // %bb.0: // %entry 937; CHECK-NEXT: ucvtf v0.4s, v0.4s 938; CHECK-NEXT: movi v1.4s, #1 939; CHECK-NEXT: ushr v2.4s, v0.4s, #16 940; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 941; CHECK-NEXT: movi v2.4s, #127, msl #8 942; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 943; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 944; CHECK-NEXT: ret 945entry: 946 %c = uitofp <4 x i32> %a to <4 x bfloat> 947 ret <4 x bfloat> %c 948} 949 950define <8 x bfloat> @stofp_v8i32_v8bf16(<8 x i32> %a) { 951; CHECK-LABEL: stofp_v8i32_v8bf16: 952; CHECK: // %bb.0: // %entry 953; CHECK-NEXT: scvtf v0.4s, v0.4s 954; CHECK-NEXT: movi v2.4s, #1 955; CHECK-NEXT: scvtf v1.4s, v1.4s 956; CHECK-NEXT: movi v5.4s, #127, msl #8 957; CHECK-NEXT: ushr v3.4s, v0.4s, #16 958; CHECK-NEXT: ushr v4.4s, v1.4s, #16 959; CHECK-NEXT: and v3.16b, v3.16b, v2.16b 960; CHECK-NEXT: and v2.16b, v4.16b, v2.16b 961; CHECK-NEXT: add v0.4s, v3.4s, v0.4s 962; CHECK-NEXT: add v1.4s, v2.4s, v1.4s 963; CHECK-NEXT: addhn v0.4h, v0.4s, v5.4s 964; CHECK-NEXT: addhn2 v0.8h, v1.4s, v5.4s 965; CHECK-NEXT: ret 966entry: 967 %c = sitofp <8 x i32> %a to <8 x bfloat> 968 ret <8 x bfloat> %c 969} 970 971define <8 x bfloat> @utofp_v8i32_v8bf16(<8 x i32> %a) { 972; CHECK-LABEL: utofp_v8i32_v8bf16: 973; CHECK: // %bb.0: // %entry 974; CHECK-NEXT: ucvtf v0.4s, v0.4s 975; CHECK-NEXT: movi v2.4s, #1 976; CHECK-NEXT: ucvtf v1.4s, v1.4s 977; CHECK-NEXT: movi v5.4s, #127, msl #8 978; CHECK-NEXT: ushr v3.4s, v0.4s, #16 979; CHECK-NEXT: ushr v4.4s, v1.4s, #16 980; CHECK-NEXT: and v3.16b, v3.16b, v2.16b 981; CHECK-NEXT: and v2.16b, v4.16b, v2.16b 982; CHECK-NEXT: add v0.4s, v3.4s, v0.4s 983; CHECK-NEXT: add v1.4s, v2.4s, v1.4s 984; CHECK-NEXT: addhn v0.4h, v0.4s, v5.4s 985; CHECK-NEXT: addhn2 v0.8h, v1.4s, v5.4s 986; CHECK-NEXT: ret 987entry: 988 %c = uitofp <8 x i32> %a to <8 x bfloat> 989 ret <8 x bfloat> %c 990} 991 992define <16 x bfloat> @stofp_v16i32_v16bf16(<16 x i32> %a) { 993; CHECK-LABEL: stofp_v16i32_v16bf16: 994; CHECK: // %bb.0: // %entry 995; CHECK-NEXT: scvtf v2.4s, v2.4s 996; CHECK-NEXT: scvtf v0.4s, v0.4s 997; CHECK-NEXT: scvtf v4.4s, v1.4s 998; CHECK-NEXT: movi v1.4s, #1 999; CHECK-NEXT: scvtf v3.4s, v3.4s 1000; CHECK-NEXT: movi v17.4s, #127, msl #8 1001; CHECK-NEXT: ushr v5.4s, v0.4s, #16 1002; CHECK-NEXT: ushr v6.4s, v2.4s, #16 1003; CHECK-NEXT: ushr v7.4s, v4.4s, #16 1004; CHECK-NEXT: ushr v16.4s, v3.4s, #16 1005; CHECK-NEXT: and v5.16b, v5.16b, v1.16b 1006; CHECK-NEXT: and v6.16b, v6.16b, v1.16b 1007; CHECK-NEXT: add v0.4s, v5.4s, v0.4s 1008; CHECK-NEXT: add v2.4s, v6.4s, v2.4s 1009; CHECK-NEXT: and v5.16b, v7.16b, v1.16b 1010; CHECK-NEXT: and v6.16b, v16.16b, v1.16b 1011; CHECK-NEXT: addhn v0.4h, v0.4s, v17.4s 1012; CHECK-NEXT: addhn v1.4h, v2.4s, v17.4s 1013; CHECK-NEXT: add v2.4s, v5.4s, v4.4s 1014; CHECK-NEXT: add v3.4s, v6.4s, v3.4s 1015; CHECK-NEXT: addhn2 v0.8h, v2.4s, v17.4s 1016; CHECK-NEXT: addhn2 v1.8h, v3.4s, v17.4s 1017; CHECK-NEXT: ret 1018entry: 1019 %c = sitofp <16 x i32> %a to <16 x bfloat> 1020 ret <16 x bfloat> %c 1021} 1022 1023define <16 x bfloat> @utofp_v16i32_v16bf16(<16 x i32> %a) { 1024; CHECK-LABEL: utofp_v16i32_v16bf16: 1025; CHECK: // %bb.0: // %entry 1026; CHECK-NEXT: ucvtf v2.4s, v2.4s 1027; CHECK-NEXT: ucvtf v0.4s, v0.4s 1028; CHECK-NEXT: ucvtf v4.4s, v1.4s 1029; CHECK-NEXT: movi v1.4s, #1 1030; CHECK-NEXT: ucvtf v3.4s, v3.4s 1031; CHECK-NEXT: movi v17.4s, #127, msl #8 1032; CHECK-NEXT: ushr v5.4s, v0.4s, #16 1033; CHECK-NEXT: ushr v6.4s, v2.4s, #16 1034; CHECK-NEXT: ushr v7.4s, v4.4s, #16 1035; CHECK-NEXT: ushr v16.4s, v3.4s, #16 1036; CHECK-NEXT: and v5.16b, v5.16b, v1.16b 1037; CHECK-NEXT: and v6.16b, v6.16b, v1.16b 1038; CHECK-NEXT: add v0.4s, v5.4s, v0.4s 1039; CHECK-NEXT: add v2.4s, v6.4s, v2.4s 1040; CHECK-NEXT: and v5.16b, v7.16b, v1.16b 1041; CHECK-NEXT: and v6.16b, v16.16b, v1.16b 1042; CHECK-NEXT: addhn v0.4h, v0.4s, v17.4s 1043; CHECK-NEXT: addhn v1.4h, v2.4s, v17.4s 1044; CHECK-NEXT: add v2.4s, v5.4s, v4.4s 1045; CHECK-NEXT: add v3.4s, v6.4s, v3.4s 1046; CHECK-NEXT: addhn2 v0.8h, v2.4s, v17.4s 1047; CHECK-NEXT: addhn2 v1.8h, v3.4s, v17.4s 1048; CHECK-NEXT: ret 1049entry: 1050 %c = uitofp <16 x i32> %a to <16 x bfloat> 1051 ret <16 x bfloat> %c 1052} 1053 1054define <32 x bfloat> @stofp_v32i32_v32bf16(<32 x i32> %a) { 1055; CHECK-LABEL: stofp_v32i32_v32bf16: 1056; CHECK: // %bb.0: // %entry 1057; CHECK-NEXT: scvtf v0.4s, v0.4s 1058; CHECK-NEXT: scvtf v2.4s, v2.4s 1059; CHECK-NEXT: scvtf v4.4s, v4.4s 1060; CHECK-NEXT: scvtf v6.4s, v6.4s 1061; CHECK-NEXT: movi v16.4s, #1 1062; CHECK-NEXT: scvtf v1.4s, v1.4s 1063; CHECK-NEXT: scvtf v17.4s, v3.4s 1064; CHECK-NEXT: scvtf v5.4s, v5.4s 1065; CHECK-NEXT: scvtf v7.4s, v7.4s 1066; CHECK-NEXT: movi v21.4s, #127, msl #8 1067; CHECK-NEXT: ushr v3.4s, v0.4s, #16 1068; CHECK-NEXT: ushr v18.4s, v2.4s, #16 1069; CHECK-NEXT: ushr v19.4s, v4.4s, #16 1070; CHECK-NEXT: ushr v20.4s, v6.4s, #16 1071; CHECK-NEXT: ushr v22.4s, v1.4s, #16 1072; CHECK-NEXT: ushr v23.4s, v17.4s, #16 1073; CHECK-NEXT: ushr v24.4s, v5.4s, #16 1074; CHECK-NEXT: ushr v25.4s, v7.4s, #16 1075; CHECK-NEXT: and v3.16b, v3.16b, v16.16b 1076; CHECK-NEXT: and v18.16b, v18.16b, v16.16b 1077; CHECK-NEXT: and v19.16b, v19.16b, v16.16b 1078; CHECK-NEXT: and v20.16b, v20.16b, v16.16b 1079; CHECK-NEXT: add v0.4s, v3.4s, v0.4s 1080; CHECK-NEXT: and v3.16b, v22.16b, v16.16b 1081; CHECK-NEXT: add v2.4s, v18.4s, v2.4s 1082; CHECK-NEXT: add v4.4s, v19.4s, v4.4s 1083; CHECK-NEXT: add v6.4s, v20.4s, v6.4s 1084; CHECK-NEXT: and v18.16b, v23.16b, v16.16b 1085; CHECK-NEXT: and v19.16b, v24.16b, v16.16b 1086; CHECK-NEXT: and v16.16b, v25.16b, v16.16b 1087; CHECK-NEXT: add v20.4s, v3.4s, v1.4s 1088; CHECK-NEXT: addhn v0.4h, v0.4s, v21.4s 1089; CHECK-NEXT: addhn v1.4h, v2.4s, v21.4s 1090; CHECK-NEXT: addhn v2.4h, v4.4s, v21.4s 1091; CHECK-NEXT: addhn v3.4h, v6.4s, v21.4s 1092; CHECK-NEXT: add v4.4s, v18.4s, v17.4s 1093; CHECK-NEXT: add v5.4s, v19.4s, v5.4s 1094; CHECK-NEXT: add v6.4s, v16.4s, v7.4s 1095; CHECK-NEXT: addhn2 v0.8h, v20.4s, v21.4s 1096; CHECK-NEXT: addhn2 v1.8h, v4.4s, v21.4s 1097; CHECK-NEXT: addhn2 v2.8h, v5.4s, v21.4s 1098; CHECK-NEXT: addhn2 v3.8h, v6.4s, v21.4s 1099; CHECK-NEXT: ret 1100entry: 1101 %c = sitofp <32 x i32> %a to <32 x bfloat> 1102 ret <32 x bfloat> %c 1103} 1104 1105define <32 x bfloat> @utofp_v32i32_v32bf16(<32 x i32> %a) { 1106; CHECK-LABEL: utofp_v32i32_v32bf16: 1107; CHECK: // %bb.0: // %entry 1108; CHECK-NEXT: ucvtf v0.4s, v0.4s 1109; CHECK-NEXT: ucvtf v2.4s, v2.4s 1110; CHECK-NEXT: ucvtf v4.4s, v4.4s 1111; CHECK-NEXT: ucvtf v6.4s, v6.4s 1112; CHECK-NEXT: movi v16.4s, #1 1113; CHECK-NEXT: ucvtf v1.4s, v1.4s 1114; CHECK-NEXT: ucvtf v17.4s, v3.4s 1115; CHECK-NEXT: ucvtf v5.4s, v5.4s 1116; CHECK-NEXT: ucvtf v7.4s, v7.4s 1117; CHECK-NEXT: movi v21.4s, #127, msl #8 1118; CHECK-NEXT: ushr v3.4s, v0.4s, #16 1119; CHECK-NEXT: ushr v18.4s, v2.4s, #16 1120; CHECK-NEXT: ushr v19.4s, v4.4s, #16 1121; CHECK-NEXT: ushr v20.4s, v6.4s, #16 1122; CHECK-NEXT: ushr v22.4s, v1.4s, #16 1123; CHECK-NEXT: ushr v23.4s, v17.4s, #16 1124; CHECK-NEXT: ushr v24.4s, v5.4s, #16 1125; CHECK-NEXT: ushr v25.4s, v7.4s, #16 1126; CHECK-NEXT: and v3.16b, v3.16b, v16.16b 1127; CHECK-NEXT: and v18.16b, v18.16b, v16.16b 1128; CHECK-NEXT: and v19.16b, v19.16b, v16.16b 1129; CHECK-NEXT: and v20.16b, v20.16b, v16.16b 1130; CHECK-NEXT: add v0.4s, v3.4s, v0.4s 1131; CHECK-NEXT: and v3.16b, v22.16b, v16.16b 1132; CHECK-NEXT: add v2.4s, v18.4s, v2.4s 1133; CHECK-NEXT: add v4.4s, v19.4s, v4.4s 1134; CHECK-NEXT: add v6.4s, v20.4s, v6.4s 1135; CHECK-NEXT: and v18.16b, v23.16b, v16.16b 1136; CHECK-NEXT: and v19.16b, v24.16b, v16.16b 1137; CHECK-NEXT: and v16.16b, v25.16b, v16.16b 1138; CHECK-NEXT: add v20.4s, v3.4s, v1.4s 1139; CHECK-NEXT: addhn v0.4h, v0.4s, v21.4s 1140; CHECK-NEXT: addhn v1.4h, v2.4s, v21.4s 1141; CHECK-NEXT: addhn v2.4h, v4.4s, v21.4s 1142; CHECK-NEXT: addhn v3.4h, v6.4s, v21.4s 1143; CHECK-NEXT: add v4.4s, v18.4s, v17.4s 1144; CHECK-NEXT: add v5.4s, v19.4s, v5.4s 1145; CHECK-NEXT: add v6.4s, v16.4s, v7.4s 1146; CHECK-NEXT: addhn2 v0.8h, v20.4s, v21.4s 1147; CHECK-NEXT: addhn2 v1.8h, v4.4s, v21.4s 1148; CHECK-NEXT: addhn2 v2.8h, v5.4s, v21.4s 1149; CHECK-NEXT: addhn2 v3.8h, v6.4s, v21.4s 1150; CHECK-NEXT: ret 1151entry: 1152 %c = uitofp <32 x i32> %a to <32 x bfloat> 1153 ret <32 x bfloat> %c 1154} 1155 1156define <2 x bfloat> @stofp_v2i16_v2bf16(<2 x i16> %a) { 1157; CHECK-LABEL: stofp_v2i16_v2bf16: 1158; CHECK: // %bb.0: // %entry 1159; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h 1160; CHECK-NEXT: movi v1.4s, #1 1161; CHECK-NEXT: sshll v0.4s, v0.4h, #0 1162; CHECK-NEXT: scvtf v0.4s, v0.4s 1163; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1164; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1165; CHECK-NEXT: movi v2.4s, #127, msl #8 1166; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1167; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1168; CHECK-NEXT: ret 1169entry: 1170 %c = sitofp <2 x i16> %a to <2 x bfloat> 1171 ret <2 x bfloat> %c 1172} 1173 1174define <2 x bfloat> @utofp_v2i16_v2bf16(<2 x i16> %a) { 1175; CHECK-LABEL: utofp_v2i16_v2bf16: 1176; CHECK: // %bb.0: // %entry 1177; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h 1178; CHECK-NEXT: movi v1.4s, #1 1179; CHECK-NEXT: ushll v0.4s, v0.4h, #0 1180; CHECK-NEXT: ucvtf v0.4s, v0.4s 1181; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1182; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1183; CHECK-NEXT: movi v2.4s, #127, msl #8 1184; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1185; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1186; CHECK-NEXT: ret 1187entry: 1188 %c = uitofp <2 x i16> %a to <2 x bfloat> 1189 ret <2 x bfloat> %c 1190} 1191 1192define <3 x bfloat> @stofp_v3i16_v3bf16(<3 x i16> %a) { 1193; CHECK-LABEL: stofp_v3i16_v3bf16: 1194; CHECK: // %bb.0: // %entry 1195; CHECK-NEXT: sshll v0.4s, v0.4h, #0 1196; CHECK-NEXT: movi v1.4s, #1 1197; CHECK-NEXT: scvtf v0.4s, v0.4s 1198; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1199; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1200; CHECK-NEXT: movi v2.4s, #127, msl #8 1201; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1202; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1203; CHECK-NEXT: ret 1204entry: 1205 %c = sitofp <3 x i16> %a to <3 x bfloat> 1206 ret <3 x bfloat> %c 1207} 1208 1209define <3 x bfloat> @utofp_v3i16_v3bf16(<3 x i16> %a) { 1210; CHECK-LABEL: utofp_v3i16_v3bf16: 1211; CHECK: // %bb.0: // %entry 1212; CHECK-NEXT: ushll v0.4s, v0.4h, #0 1213; CHECK-NEXT: movi v1.4s, #1 1214; CHECK-NEXT: ucvtf v0.4s, v0.4s 1215; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1216; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1217; CHECK-NEXT: movi v2.4s, #127, msl #8 1218; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1219; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1220; CHECK-NEXT: ret 1221entry: 1222 %c = uitofp <3 x i16> %a to <3 x bfloat> 1223 ret <3 x bfloat> %c 1224} 1225 1226define <4 x bfloat> @stofp_v4i16_v4bf16(<4 x i16> %a) { 1227; CHECK-LABEL: stofp_v4i16_v4bf16: 1228; CHECK: // %bb.0: // %entry 1229; CHECK-NEXT: sshll v0.4s, v0.4h, #0 1230; CHECK-NEXT: movi v1.4s, #1 1231; CHECK-NEXT: scvtf v0.4s, v0.4s 1232; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1233; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1234; CHECK-NEXT: movi v2.4s, #127, msl #8 1235; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1236; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1237; CHECK-NEXT: ret 1238entry: 1239 %c = sitofp <4 x i16> %a to <4 x bfloat> 1240 ret <4 x bfloat> %c 1241} 1242 1243define <4 x bfloat> @utofp_v4i16_v4bf16(<4 x i16> %a) { 1244; CHECK-LABEL: utofp_v4i16_v4bf16: 1245; CHECK: // %bb.0: // %entry 1246; CHECK-NEXT: ushll v0.4s, v0.4h, #0 1247; CHECK-NEXT: movi v1.4s, #1 1248; CHECK-NEXT: ucvtf v0.4s, v0.4s 1249; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1250; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1251; CHECK-NEXT: movi v2.4s, #127, msl #8 1252; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1253; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1254; CHECK-NEXT: ret 1255entry: 1256 %c = uitofp <4 x i16> %a to <4 x bfloat> 1257 ret <4 x bfloat> %c 1258} 1259 1260define <8 x bfloat> @stofp_v8i16_v8bf16(<8 x i16> %a) { 1261; CHECK-LABEL: stofp_v8i16_v8bf16: 1262; CHECK: // %bb.0: // %entry 1263; CHECK-NEXT: sshll v2.4s, v0.4h, #0 1264; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 1265; CHECK-NEXT: movi v1.4s, #1 1266; CHECK-NEXT: movi v4.4s, #127, msl #8 1267; CHECK-NEXT: scvtf v2.4s, v2.4s 1268; CHECK-NEXT: scvtf v3.4s, v0.4s 1269; CHECK-NEXT: ushr v0.4s, v2.4s, #16 1270; CHECK-NEXT: ushr v5.4s, v3.4s, #16 1271; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 1272; CHECK-NEXT: and v1.16b, v5.16b, v1.16b 1273; CHECK-NEXT: add v0.4s, v0.4s, v4.4s 1274; CHECK-NEXT: add v1.4s, v1.4s, v4.4s 1275; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s 1276; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s 1277; CHECK-NEXT: ret 1278entry: 1279 %c = sitofp <8 x i16> %a to <8 x bfloat> 1280 ret <8 x bfloat> %c 1281} 1282 1283define <8 x bfloat> @utofp_v8i16_v8bf16(<8 x i16> %a) { 1284; CHECK-LABEL: utofp_v8i16_v8bf16: 1285; CHECK: // %bb.0: // %entry 1286; CHECK-NEXT: ushll v2.4s, v0.4h, #0 1287; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 1288; CHECK-NEXT: movi v1.4s, #1 1289; CHECK-NEXT: movi v4.4s, #127, msl #8 1290; CHECK-NEXT: ucvtf v2.4s, v2.4s 1291; CHECK-NEXT: ucvtf v3.4s, v0.4s 1292; CHECK-NEXT: ushr v0.4s, v2.4s, #16 1293; CHECK-NEXT: ushr v5.4s, v3.4s, #16 1294; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 1295; CHECK-NEXT: and v1.16b, v5.16b, v1.16b 1296; CHECK-NEXT: add v0.4s, v0.4s, v4.4s 1297; CHECK-NEXT: add v1.4s, v1.4s, v4.4s 1298; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s 1299; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s 1300; CHECK-NEXT: ret 1301entry: 1302 %c = uitofp <8 x i16> %a to <8 x bfloat> 1303 ret <8 x bfloat> %c 1304} 1305 1306define <16 x bfloat> @stofp_v16i16_v16bf16(<16 x i16> %a) { 1307; CHECK-LABEL: stofp_v16i16_v16bf16: 1308; CHECK: // %bb.0: // %entry 1309; CHECK-NEXT: sshll v3.4s, v0.4h, #0 1310; CHECK-NEXT: sshll v4.4s, v1.4h, #0 1311; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 1312; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 1313; CHECK-NEXT: movi v2.4s, #1 1314; CHECK-NEXT: movi v7.4s, #127, msl #8 1315; CHECK-NEXT: scvtf v3.4s, v3.4s 1316; CHECK-NEXT: scvtf v4.4s, v4.4s 1317; CHECK-NEXT: scvtf v5.4s, v0.4s 1318; CHECK-NEXT: scvtf v6.4s, v1.4s 1319; CHECK-NEXT: ushr v0.4s, v3.4s, #16 1320; CHECK-NEXT: ushr v1.4s, v4.4s, #16 1321; CHECK-NEXT: ushr v16.4s, v5.4s, #16 1322; CHECK-NEXT: ushr v17.4s, v6.4s, #16 1323; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 1324; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 1325; CHECK-NEXT: and v16.16b, v16.16b, v2.16b 1326; CHECK-NEXT: and v2.16b, v17.16b, v2.16b 1327; CHECK-NEXT: add v0.4s, v0.4s, v7.4s 1328; CHECK-NEXT: add v1.4s, v1.4s, v7.4s 1329; CHECK-NEXT: add v2.4s, v2.4s, v7.4s 1330; CHECK-NEXT: addhn v0.4h, v3.4s, v0.4s 1331; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s 1332; CHECK-NEXT: add v3.4s, v16.4s, v7.4s 1333; CHECK-NEXT: addhn2 v0.8h, v5.4s, v3.4s 1334; CHECK-NEXT: addhn2 v1.8h, v6.4s, v2.4s 1335; CHECK-NEXT: ret 1336entry: 1337 %c = sitofp <16 x i16> %a to <16 x bfloat> 1338 ret <16 x bfloat> %c 1339} 1340 1341define <16 x bfloat> @utofp_v16i16_v16bf16(<16 x i16> %a) { 1342; CHECK-LABEL: utofp_v16i16_v16bf16: 1343; CHECK: // %bb.0: // %entry 1344; CHECK-NEXT: ushll v3.4s, v0.4h, #0 1345; CHECK-NEXT: ushll v4.4s, v1.4h, #0 1346; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 1347; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 1348; CHECK-NEXT: movi v2.4s, #1 1349; CHECK-NEXT: movi v7.4s, #127, msl #8 1350; CHECK-NEXT: ucvtf v3.4s, v3.4s 1351; CHECK-NEXT: ucvtf v4.4s, v4.4s 1352; CHECK-NEXT: ucvtf v5.4s, v0.4s 1353; CHECK-NEXT: ucvtf v6.4s, v1.4s 1354; CHECK-NEXT: ushr v0.4s, v3.4s, #16 1355; CHECK-NEXT: ushr v1.4s, v4.4s, #16 1356; CHECK-NEXT: ushr v16.4s, v5.4s, #16 1357; CHECK-NEXT: ushr v17.4s, v6.4s, #16 1358; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 1359; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 1360; CHECK-NEXT: and v16.16b, v16.16b, v2.16b 1361; CHECK-NEXT: and v2.16b, v17.16b, v2.16b 1362; CHECK-NEXT: add v0.4s, v0.4s, v7.4s 1363; CHECK-NEXT: add v1.4s, v1.4s, v7.4s 1364; CHECK-NEXT: add v2.4s, v2.4s, v7.4s 1365; CHECK-NEXT: addhn v0.4h, v3.4s, v0.4s 1366; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s 1367; CHECK-NEXT: add v3.4s, v16.4s, v7.4s 1368; CHECK-NEXT: addhn2 v0.8h, v5.4s, v3.4s 1369; CHECK-NEXT: addhn2 v1.8h, v6.4s, v2.4s 1370; CHECK-NEXT: ret 1371entry: 1372 %c = uitofp <16 x i16> %a to <16 x bfloat> 1373 ret <16 x bfloat> %c 1374} 1375 1376define <32 x bfloat> @stofp_v32i16_v32bf16(<32 x i16> %a) { 1377; CHECK-LABEL: stofp_v32i16_v32bf16: 1378; CHECK: // %bb.0: // %entry 1379; CHECK-NEXT: sshll v4.4s, v1.4h, #0 1380; CHECK-NEXT: sshll v5.4s, v0.4h, #0 1381; CHECK-NEXT: sshll v6.4s, v2.4h, #0 1382; CHECK-NEXT: sshll v7.4s, v3.4h, #0 1383; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 1384; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 1385; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0 1386; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0 1387; CHECK-NEXT: movi v16.4s, #1 1388; CHECK-NEXT: scvtf v5.4s, v5.4s 1389; CHECK-NEXT: scvtf v4.4s, v4.4s 1390; CHECK-NEXT: scvtf v6.4s, v6.4s 1391; CHECK-NEXT: scvtf v7.4s, v7.4s 1392; CHECK-NEXT: scvtf v17.4s, v0.4s 1393; CHECK-NEXT: scvtf v18.4s, v1.4s 1394; CHECK-NEXT: scvtf v19.4s, v2.4s 1395; CHECK-NEXT: scvtf v20.4s, v3.4s 1396; CHECK-NEXT: movi v21.4s, #127, msl #8 1397; CHECK-NEXT: ushr v0.4s, v5.4s, #16 1398; CHECK-NEXT: ushr v1.4s, v4.4s, #16 1399; CHECK-NEXT: ushr v2.4s, v6.4s, #16 1400; CHECK-NEXT: ushr v3.4s, v7.4s, #16 1401; CHECK-NEXT: ushr v22.4s, v17.4s, #16 1402; CHECK-NEXT: ushr v23.4s, v18.4s, #16 1403; CHECK-NEXT: ushr v24.4s, v19.4s, #16 1404; CHECK-NEXT: ushr v25.4s, v20.4s, #16 1405; CHECK-NEXT: and v0.16b, v0.16b, v16.16b 1406; CHECK-NEXT: and v1.16b, v1.16b, v16.16b 1407; CHECK-NEXT: and v2.16b, v2.16b, v16.16b 1408; CHECK-NEXT: and v3.16b, v3.16b, v16.16b 1409; CHECK-NEXT: and v22.16b, v22.16b, v16.16b 1410; CHECK-NEXT: and v23.16b, v23.16b, v16.16b 1411; CHECK-NEXT: and v24.16b, v24.16b, v16.16b 1412; CHECK-NEXT: and v16.16b, v25.16b, v16.16b 1413; CHECK-NEXT: add v0.4s, v0.4s, v21.4s 1414; CHECK-NEXT: add v1.4s, v1.4s, v21.4s 1415; CHECK-NEXT: add v2.4s, v2.4s, v21.4s 1416; CHECK-NEXT: add v3.4s, v3.4s, v21.4s 1417; CHECK-NEXT: addhn v0.4h, v5.4s, v0.4s 1418; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s 1419; CHECK-NEXT: addhn v2.4h, v6.4s, v2.4s 1420; CHECK-NEXT: addhn v3.4h, v7.4s, v3.4s 1421; CHECK-NEXT: add v4.4s, v22.4s, v21.4s 1422; CHECK-NEXT: add v5.4s, v23.4s, v21.4s 1423; CHECK-NEXT: add v6.4s, v24.4s, v21.4s 1424; CHECK-NEXT: add v7.4s, v16.4s, v21.4s 1425; CHECK-NEXT: addhn2 v0.8h, v17.4s, v4.4s 1426; CHECK-NEXT: addhn2 v1.8h, v18.4s, v5.4s 1427; CHECK-NEXT: addhn2 v2.8h, v19.4s, v6.4s 1428; CHECK-NEXT: addhn2 v3.8h, v20.4s, v7.4s 1429; CHECK-NEXT: ret 1430entry: 1431 %c = sitofp <32 x i16> %a to <32 x bfloat> 1432 ret <32 x bfloat> %c 1433} 1434 1435define <32 x bfloat> @utofp_v32i16_v32bf16(<32 x i16> %a) { 1436; CHECK-LABEL: utofp_v32i16_v32bf16: 1437; CHECK: // %bb.0: // %entry 1438; CHECK-NEXT: ushll v4.4s, v1.4h, #0 1439; CHECK-NEXT: ushll v5.4s, v0.4h, #0 1440; CHECK-NEXT: ushll v6.4s, v2.4h, #0 1441; CHECK-NEXT: ushll v7.4s, v3.4h, #0 1442; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 1443; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 1444; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0 1445; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0 1446; CHECK-NEXT: movi v16.4s, #1 1447; CHECK-NEXT: ucvtf v5.4s, v5.4s 1448; CHECK-NEXT: ucvtf v4.4s, v4.4s 1449; CHECK-NEXT: ucvtf v6.4s, v6.4s 1450; CHECK-NEXT: ucvtf v7.4s, v7.4s 1451; CHECK-NEXT: ucvtf v17.4s, v0.4s 1452; CHECK-NEXT: ucvtf v18.4s, v1.4s 1453; CHECK-NEXT: ucvtf v19.4s, v2.4s 1454; CHECK-NEXT: ucvtf v20.4s, v3.4s 1455; CHECK-NEXT: movi v21.4s, #127, msl #8 1456; CHECK-NEXT: ushr v0.4s, v5.4s, #16 1457; CHECK-NEXT: ushr v1.4s, v4.4s, #16 1458; CHECK-NEXT: ushr v2.4s, v6.4s, #16 1459; CHECK-NEXT: ushr v3.4s, v7.4s, #16 1460; CHECK-NEXT: ushr v22.4s, v17.4s, #16 1461; CHECK-NEXT: ushr v23.4s, v18.4s, #16 1462; CHECK-NEXT: ushr v24.4s, v19.4s, #16 1463; CHECK-NEXT: ushr v25.4s, v20.4s, #16 1464; CHECK-NEXT: and v0.16b, v0.16b, v16.16b 1465; CHECK-NEXT: and v1.16b, v1.16b, v16.16b 1466; CHECK-NEXT: and v2.16b, v2.16b, v16.16b 1467; CHECK-NEXT: and v3.16b, v3.16b, v16.16b 1468; CHECK-NEXT: and v22.16b, v22.16b, v16.16b 1469; CHECK-NEXT: and v23.16b, v23.16b, v16.16b 1470; CHECK-NEXT: and v24.16b, v24.16b, v16.16b 1471; CHECK-NEXT: and v16.16b, v25.16b, v16.16b 1472; CHECK-NEXT: add v0.4s, v0.4s, v21.4s 1473; CHECK-NEXT: add v1.4s, v1.4s, v21.4s 1474; CHECK-NEXT: add v2.4s, v2.4s, v21.4s 1475; CHECK-NEXT: add v3.4s, v3.4s, v21.4s 1476; CHECK-NEXT: addhn v0.4h, v5.4s, v0.4s 1477; CHECK-NEXT: addhn v1.4h, v4.4s, v1.4s 1478; CHECK-NEXT: addhn v2.4h, v6.4s, v2.4s 1479; CHECK-NEXT: addhn v3.4h, v7.4s, v3.4s 1480; CHECK-NEXT: add v4.4s, v22.4s, v21.4s 1481; CHECK-NEXT: add v5.4s, v23.4s, v21.4s 1482; CHECK-NEXT: add v6.4s, v24.4s, v21.4s 1483; CHECK-NEXT: add v7.4s, v16.4s, v21.4s 1484; CHECK-NEXT: addhn2 v0.8h, v17.4s, v4.4s 1485; CHECK-NEXT: addhn2 v1.8h, v18.4s, v5.4s 1486; CHECK-NEXT: addhn2 v2.8h, v19.4s, v6.4s 1487; CHECK-NEXT: addhn2 v3.8h, v20.4s, v7.4s 1488; CHECK-NEXT: ret 1489entry: 1490 %c = uitofp <32 x i16> %a to <32 x bfloat> 1491 ret <32 x bfloat> %c 1492} 1493 1494define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) { 1495; CHECK-LABEL: stofp_v2i8_v2bf16: 1496; CHECK: // %bb.0: // %entry 1497; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1498; CHECK-NEXT: mov w9, v0.s[1] 1499; CHECK-NEXT: fmov w10, s0 1500; CHECK-NEXT: mov w8, #32767 // =0x7fff 1501; CHECK-NEXT: sxtb w10, w10 1502; CHECK-NEXT: sxtb w9, w9 1503; CHECK-NEXT: scvtf s1, w10 1504; CHECK-NEXT: scvtf s0, w9 1505; CHECK-NEXT: fmov w10, s1 1506; CHECK-NEXT: fmov w9, s0 1507; CHECK-NEXT: ubfx w12, w10, #16, #1 1508; CHECK-NEXT: ubfx w11, w9, #16, #1 1509; CHECK-NEXT: add w9, w9, w8 1510; CHECK-NEXT: add w8, w10, w8 1511; CHECK-NEXT: add w8, w12, w8 1512; CHECK-NEXT: add w9, w11, w9 1513; CHECK-NEXT: lsr w8, w8, #16 1514; CHECK-NEXT: lsr w9, w9, #16 1515; CHECK-NEXT: fmov s0, w8 1516; CHECK-NEXT: fmov s1, w9 1517; CHECK-NEXT: mov v0.h[1], v1.h[0] 1518; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1519; CHECK-NEXT: ret 1520entry: 1521 %c = sitofp <2 x i8> %a to <2 x bfloat> 1522 ret <2 x bfloat> %c 1523} 1524 1525define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) { 1526; CHECK-LABEL: utofp_v2i8_v2bf16: 1527; CHECK: // %bb.0: // %entry 1528; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1529; CHECK-NEXT: mov w9, v0.s[1] 1530; CHECK-NEXT: fmov w10, s0 1531; CHECK-NEXT: mov w8, #32767 // =0x7fff 1532; CHECK-NEXT: and w10, w10, #0xff 1533; CHECK-NEXT: and w9, w9, #0xff 1534; CHECK-NEXT: ucvtf s1, w10 1535; CHECK-NEXT: ucvtf s0, w9 1536; CHECK-NEXT: fmov w10, s1 1537; CHECK-NEXT: fmov w9, s0 1538; CHECK-NEXT: ubfx w12, w10, #16, #1 1539; CHECK-NEXT: ubfx w11, w9, #16, #1 1540; CHECK-NEXT: add w9, w9, w8 1541; CHECK-NEXT: add w8, w10, w8 1542; CHECK-NEXT: add w8, w12, w8 1543; CHECK-NEXT: add w9, w11, w9 1544; CHECK-NEXT: lsr w8, w8, #16 1545; CHECK-NEXT: lsr w9, w9, #16 1546; CHECK-NEXT: fmov s0, w8 1547; CHECK-NEXT: fmov s1, w9 1548; CHECK-NEXT: mov v0.h[1], v1.h[0] 1549; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1550; CHECK-NEXT: ret 1551entry: 1552 %c = uitofp <2 x i8> %a to <2 x bfloat> 1553 ret <2 x bfloat> %c 1554} 1555 1556define <3 x bfloat> @stofp_v3i8_v3bf16(<3 x i8> %a) { 1557; CHECK-LABEL: stofp_v3i8_v3bf16: 1558; CHECK: // %bb.0: // %entry 1559; CHECK-NEXT: fmov s0, w0 1560; CHECK-NEXT: movi v1.4s, #1 1561; CHECK-NEXT: mov v0.h[1], w1 1562; CHECK-NEXT: mov v0.h[2], w2 1563; CHECK-NEXT: shl v0.4h, v0.4h, #8 1564; CHECK-NEXT: sshr v0.4h, v0.4h, #8 1565; CHECK-NEXT: sshll v0.4s, v0.4h, #0 1566; CHECK-NEXT: scvtf v0.4s, v0.4s 1567; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1568; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1569; CHECK-NEXT: movi v2.4s, #127, msl #8 1570; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1571; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1572; CHECK-NEXT: ret 1573entry: 1574 %c = sitofp <3 x i8> %a to <3 x bfloat> 1575 ret <3 x bfloat> %c 1576} 1577 1578define <3 x bfloat> @utofp_v3i8_v3bf16(<3 x i8> %a) { 1579; CHECK-LABEL: utofp_v3i8_v3bf16: 1580; CHECK: // %bb.0: // %entry 1581; CHECK-NEXT: fmov s0, w0 1582; CHECK-NEXT: movi v1.4s, #1 1583; CHECK-NEXT: mov v0.h[1], w1 1584; CHECK-NEXT: mov v0.h[2], w2 1585; CHECK-NEXT: bic v0.4h, #255, lsl #8 1586; CHECK-NEXT: ushll v0.4s, v0.4h, #0 1587; CHECK-NEXT: ucvtf v0.4s, v0.4s 1588; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1589; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1590; CHECK-NEXT: movi v2.4s, #127, msl #8 1591; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1592; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1593; CHECK-NEXT: ret 1594entry: 1595 %c = uitofp <3 x i8> %a to <3 x bfloat> 1596 ret <3 x bfloat> %c 1597} 1598 1599define <4 x bfloat> @stofp_v4i8_v4bf16(<4 x i8> %a) { 1600; CHECK-LABEL: stofp_v4i8_v4bf16: 1601; CHECK: // %bb.0: // %entry 1602; CHECK-NEXT: shl v0.4h, v0.4h, #8 1603; CHECK-NEXT: movi v1.4s, #1 1604; CHECK-NEXT: sshr v0.4h, v0.4h, #8 1605; CHECK-NEXT: sshll v0.4s, v0.4h, #0 1606; CHECK-NEXT: scvtf v0.4s, v0.4s 1607; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1608; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1609; CHECK-NEXT: movi v2.4s, #127, msl #8 1610; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1611; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1612; CHECK-NEXT: ret 1613entry: 1614 %c = sitofp <4 x i8> %a to <4 x bfloat> 1615 ret <4 x bfloat> %c 1616} 1617 1618define <4 x bfloat> @utofp_v4i8_v4bf16(<4 x i8> %a) { 1619; CHECK-LABEL: utofp_v4i8_v4bf16: 1620; CHECK: // %bb.0: // %entry 1621; CHECK-NEXT: bic v0.4h, #255, lsl #8 1622; CHECK-NEXT: movi v1.4s, #1 1623; CHECK-NEXT: ushll v0.4s, v0.4h, #0 1624; CHECK-NEXT: ucvtf v0.4s, v0.4s 1625; CHECK-NEXT: ushr v2.4s, v0.4s, #16 1626; CHECK-NEXT: and v1.16b, v2.16b, v1.16b 1627; CHECK-NEXT: movi v2.4s, #127, msl #8 1628; CHECK-NEXT: add v0.4s, v1.4s, v0.4s 1629; CHECK-NEXT: addhn v0.4h, v0.4s, v2.4s 1630; CHECK-NEXT: ret 1631entry: 1632 %c = uitofp <4 x i8> %a to <4 x bfloat> 1633 ret <4 x bfloat> %c 1634} 1635 1636define <8 x bfloat> @stofp_v8i8_v8bf16(<8 x i8> %a) { 1637; CHECK-LABEL: stofp_v8i8_v8bf16: 1638; CHECK: // %bb.0: // %entry 1639; CHECK-NEXT: sshll v0.8h, v0.8b, #0 1640; CHECK-NEXT: movi v1.4s, #1 1641; CHECK-NEXT: movi v4.4s, #127, msl #8 1642; CHECK-NEXT: sshll v2.4s, v0.4h, #0 1643; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 1644; CHECK-NEXT: scvtf v2.4s, v2.4s 1645; CHECK-NEXT: scvtf v3.4s, v0.4s 1646; CHECK-NEXT: ushr v0.4s, v2.4s, #16 1647; CHECK-NEXT: ushr v5.4s, v3.4s, #16 1648; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 1649; CHECK-NEXT: and v1.16b, v5.16b, v1.16b 1650; CHECK-NEXT: add v0.4s, v0.4s, v4.4s 1651; CHECK-NEXT: add v1.4s, v1.4s, v4.4s 1652; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s 1653; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s 1654; CHECK-NEXT: ret 1655entry: 1656 %c = sitofp <8 x i8> %a to <8 x bfloat> 1657 ret <8 x bfloat> %c 1658} 1659 1660define <8 x bfloat> @utofp_v8i8_v8bf16(<8 x i8> %a) { 1661; CHECK-LABEL: utofp_v8i8_v8bf16: 1662; CHECK: // %bb.0: // %entry 1663; CHECK-NEXT: ushll v0.8h, v0.8b, #0 1664; CHECK-NEXT: movi v1.4s, #1 1665; CHECK-NEXT: movi v4.4s, #127, msl #8 1666; CHECK-NEXT: ushll v2.4s, v0.4h, #0 1667; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 1668; CHECK-NEXT: ucvtf v2.4s, v2.4s 1669; CHECK-NEXT: ucvtf v3.4s, v0.4s 1670; CHECK-NEXT: ushr v0.4s, v2.4s, #16 1671; CHECK-NEXT: ushr v5.4s, v3.4s, #16 1672; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 1673; CHECK-NEXT: and v1.16b, v5.16b, v1.16b 1674; CHECK-NEXT: add v0.4s, v0.4s, v4.4s 1675; CHECK-NEXT: add v1.4s, v1.4s, v4.4s 1676; CHECK-NEXT: addhn v0.4h, v2.4s, v0.4s 1677; CHECK-NEXT: addhn2 v0.8h, v3.4s, v1.4s 1678; CHECK-NEXT: ret 1679entry: 1680 %c = uitofp <8 x i8> %a to <8 x bfloat> 1681 ret <8 x bfloat> %c 1682} 1683 1684define <16 x bfloat> @stofp_v16i8_v16bf16(<16 x i8> %a) { 1685; CHECK-LABEL: stofp_v16i8_v16bf16: 1686; CHECK: // %bb.0: // %entry 1687; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0 1688; CHECK-NEXT: sshll v0.8h, v0.8b, #0 1689; CHECK-NEXT: movi v1.4s, #1 1690; CHECK-NEXT: movi v7.4s, #127, msl #8 1691; CHECK-NEXT: sshll v3.4s, v2.4h, #0 1692; CHECK-NEXT: sshll v4.4s, v0.4h, #0 1693; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0 1694; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 1695; CHECK-NEXT: scvtf v3.4s, v3.4s 1696; CHECK-NEXT: scvtf v4.4s, v4.4s 1697; CHECK-NEXT: scvtf v2.4s, v2.4s 1698; CHECK-NEXT: scvtf v6.4s, v0.4s 1699; CHECK-NEXT: ushr v5.4s, v3.4s, #16 1700; CHECK-NEXT: ushr v0.4s, v4.4s, #16 1701; CHECK-NEXT: ushr v16.4s, v2.4s, #16 1702; CHECK-NEXT: ushr v17.4s, v6.4s, #16 1703; CHECK-NEXT: and v5.16b, v5.16b, v1.16b 1704; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 1705; CHECK-NEXT: and v16.16b, v16.16b, v1.16b 1706; CHECK-NEXT: and v17.16b, v17.16b, v1.16b 1707; CHECK-NEXT: add v5.4s, v5.4s, v7.4s 1708; CHECK-NEXT: add v0.4s, v0.4s, v7.4s 1709; CHECK-NEXT: addhn v1.4h, v3.4s, v5.4s 1710; CHECK-NEXT: addhn v0.4h, v4.4s, v0.4s 1711; CHECK-NEXT: add v3.4s, v16.4s, v7.4s 1712; CHECK-NEXT: add v4.4s, v17.4s, v7.4s 1713; CHECK-NEXT: addhn2 v1.8h, v2.4s, v3.4s 1714; CHECK-NEXT: addhn2 v0.8h, v6.4s, v4.4s 1715; CHECK-NEXT: ret 1716entry: 1717 %c = sitofp <16 x i8> %a to <16 x bfloat> 1718 ret <16 x bfloat> %c 1719} 1720 1721define <16 x bfloat> @utofp_v16i8_v16bf16(<16 x i8> %a) { 1722; CHECK-LABEL: utofp_v16i8_v16bf16: 1723; CHECK: // %bb.0: // %entry 1724; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0 1725; CHECK-NEXT: ushll v0.8h, v0.8b, #0 1726; CHECK-NEXT: movi v1.4s, #1 1727; CHECK-NEXT: movi v7.4s, #127, msl #8 1728; CHECK-NEXT: ushll v3.4s, v2.4h, #0 1729; CHECK-NEXT: ushll v4.4s, v0.4h, #0 1730; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0 1731; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 1732; CHECK-NEXT: ucvtf v3.4s, v3.4s 1733; CHECK-NEXT: ucvtf v4.4s, v4.4s 1734; CHECK-NEXT: ucvtf v2.4s, v2.4s 1735; CHECK-NEXT: ucvtf v6.4s, v0.4s 1736; CHECK-NEXT: ushr v5.4s, v3.4s, #16 1737; CHECK-NEXT: ushr v0.4s, v4.4s, #16 1738; CHECK-NEXT: ushr v16.4s, v2.4s, #16 1739; CHECK-NEXT: ushr v17.4s, v6.4s, #16 1740; CHECK-NEXT: and v5.16b, v5.16b, v1.16b 1741; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 1742; CHECK-NEXT: and v16.16b, v16.16b, v1.16b 1743; CHECK-NEXT: and v17.16b, v17.16b, v1.16b 1744; CHECK-NEXT: add v5.4s, v5.4s, v7.4s 1745; CHECK-NEXT: add v0.4s, v0.4s, v7.4s 1746; CHECK-NEXT: addhn v1.4h, v3.4s, v5.4s 1747; CHECK-NEXT: addhn v0.4h, v4.4s, v0.4s 1748; CHECK-NEXT: add v3.4s, v16.4s, v7.4s 1749; CHECK-NEXT: add v4.4s, v17.4s, v7.4s 1750; CHECK-NEXT: addhn2 v1.8h, v2.4s, v3.4s 1751; CHECK-NEXT: addhn2 v0.8h, v6.4s, v4.4s 1752; CHECK-NEXT: ret 1753entry: 1754 %c = uitofp <16 x i8> %a to <16 x bfloat> 1755 ret <16 x bfloat> %c 1756} 1757 1758define <32 x bfloat> @stofp_v32i8_v32bf16(<32 x i8> %a) { 1759; CHECK-LABEL: stofp_v32i8_v32bf16: 1760; CHECK: // %bb.0: // %entry 1761; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0 1762; CHECK-NEXT: sshll v0.8h, v0.8b, #0 1763; CHECK-NEXT: sshll2 v4.8h, v1.16b, #0 1764; CHECK-NEXT: sshll v1.8h, v1.8b, #0 1765; CHECK-NEXT: movi v2.4s, #1 1766; CHECK-NEXT: movi v21.4s, #127, msl #8 1767; CHECK-NEXT: sshll v5.4s, v3.4h, #0 1768; CHECK-NEXT: sshll v6.4s, v0.4h, #0 1769; CHECK-NEXT: sshll v7.4s, v4.4h, #0 1770; CHECK-NEXT: sshll v16.4s, v1.4h, #0 1771; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0 1772; CHECK-NEXT: sshll2 v4.4s, v4.8h, #0 1773; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 1774; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 1775; CHECK-NEXT: scvtf v5.4s, v5.4s 1776; CHECK-NEXT: scvtf v6.4s, v6.4s 1777; CHECK-NEXT: scvtf v7.4s, v7.4s 1778; CHECK-NEXT: scvtf v16.4s, v16.4s 1779; CHECK-NEXT: scvtf v17.4s, v3.4s 1780; CHECK-NEXT: scvtf v4.4s, v4.4s 1781; CHECK-NEXT: scvtf v18.4s, v0.4s 1782; CHECK-NEXT: scvtf v19.4s, v1.4s 1783; CHECK-NEXT: ushr v0.4s, v5.4s, #16 1784; CHECK-NEXT: ushr v3.4s, v6.4s, #16 1785; CHECK-NEXT: ushr v1.4s, v7.4s, #16 1786; CHECK-NEXT: ushr v20.4s, v16.4s, #16 1787; CHECK-NEXT: ushr v23.4s, v17.4s, #16 1788; CHECK-NEXT: ushr v24.4s, v4.4s, #16 1789; CHECK-NEXT: ushr v22.4s, v18.4s, #16 1790; CHECK-NEXT: ushr v25.4s, v19.4s, #16 1791; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 1792; CHECK-NEXT: and v3.16b, v3.16b, v2.16b 1793; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 1794; CHECK-NEXT: and v20.16b, v20.16b, v2.16b 1795; CHECK-NEXT: and v23.16b, v23.16b, v2.16b 1796; CHECK-NEXT: and v24.16b, v24.16b, v2.16b 1797; CHECK-NEXT: and v22.16b, v22.16b, v2.16b 1798; CHECK-NEXT: and v25.16b, v25.16b, v2.16b 1799; CHECK-NEXT: add v0.4s, v0.4s, v21.4s 1800; CHECK-NEXT: add v3.4s, v3.4s, v21.4s 1801; CHECK-NEXT: add v26.4s, v1.4s, v21.4s 1802; CHECK-NEXT: add v20.4s, v20.4s, v21.4s 1803; CHECK-NEXT: addhn v1.4h, v5.4s, v0.4s 1804; CHECK-NEXT: addhn v0.4h, v6.4s, v3.4s 1805; CHECK-NEXT: addhn v3.4h, v7.4s, v26.4s 1806; CHECK-NEXT: addhn v2.4h, v16.4s, v20.4s 1807; CHECK-NEXT: add v5.4s, v22.4s, v21.4s 1808; CHECK-NEXT: add v6.4s, v23.4s, v21.4s 1809; CHECK-NEXT: add v7.4s, v24.4s, v21.4s 1810; CHECK-NEXT: add v16.4s, v25.4s, v21.4s 1811; CHECK-NEXT: addhn2 v0.8h, v18.4s, v5.4s 1812; CHECK-NEXT: addhn2 v1.8h, v17.4s, v6.4s 1813; CHECK-NEXT: addhn2 v3.8h, v4.4s, v7.4s 1814; CHECK-NEXT: addhn2 v2.8h, v19.4s, v16.4s 1815; CHECK-NEXT: ret 1816entry: 1817 %c = sitofp <32 x i8> %a to <32 x bfloat> 1818 ret <32 x bfloat> %c 1819} 1820 1821define <32 x bfloat> @utofp_v32i8_v32bf16(<32 x i8> %a) { 1822; CHECK-LABEL: utofp_v32i8_v32bf16: 1823; CHECK: // %bb.0: // %entry 1824; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0 1825; CHECK-NEXT: ushll v0.8h, v0.8b, #0 1826; CHECK-NEXT: ushll2 v4.8h, v1.16b, #0 1827; CHECK-NEXT: ushll v1.8h, v1.8b, #0 1828; CHECK-NEXT: movi v2.4s, #1 1829; CHECK-NEXT: movi v21.4s, #127, msl #8 1830; CHECK-NEXT: ushll v5.4s, v3.4h, #0 1831; CHECK-NEXT: ushll v6.4s, v0.4h, #0 1832; CHECK-NEXT: ushll v7.4s, v4.4h, #0 1833; CHECK-NEXT: ushll v16.4s, v1.4h, #0 1834; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0 1835; CHECK-NEXT: ushll2 v4.4s, v4.8h, #0 1836; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 1837; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 1838; CHECK-NEXT: ucvtf v5.4s, v5.4s 1839; CHECK-NEXT: ucvtf v6.4s, v6.4s 1840; CHECK-NEXT: ucvtf v7.4s, v7.4s 1841; CHECK-NEXT: ucvtf v16.4s, v16.4s 1842; CHECK-NEXT: ucvtf v17.4s, v3.4s 1843; CHECK-NEXT: ucvtf v4.4s, v4.4s 1844; CHECK-NEXT: ucvtf v18.4s, v0.4s 1845; CHECK-NEXT: ucvtf v19.4s, v1.4s 1846; CHECK-NEXT: ushr v0.4s, v5.4s, #16 1847; CHECK-NEXT: ushr v3.4s, v6.4s, #16 1848; CHECK-NEXT: ushr v1.4s, v7.4s, #16 1849; CHECK-NEXT: ushr v20.4s, v16.4s, #16 1850; CHECK-NEXT: ushr v23.4s, v17.4s, #16 1851; CHECK-NEXT: ushr v24.4s, v4.4s, #16 1852; CHECK-NEXT: ushr v22.4s, v18.4s, #16 1853; CHECK-NEXT: ushr v25.4s, v19.4s, #16 1854; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 1855; CHECK-NEXT: and v3.16b, v3.16b, v2.16b 1856; CHECK-NEXT: and v1.16b, v1.16b, v2.16b 1857; CHECK-NEXT: and v20.16b, v20.16b, v2.16b 1858; CHECK-NEXT: and v23.16b, v23.16b, v2.16b 1859; CHECK-NEXT: and v24.16b, v24.16b, v2.16b 1860; CHECK-NEXT: and v22.16b, v22.16b, v2.16b 1861; CHECK-NEXT: and v25.16b, v25.16b, v2.16b 1862; CHECK-NEXT: add v0.4s, v0.4s, v21.4s 1863; CHECK-NEXT: add v3.4s, v3.4s, v21.4s 1864; CHECK-NEXT: add v26.4s, v1.4s, v21.4s 1865; CHECK-NEXT: add v20.4s, v20.4s, v21.4s 1866; CHECK-NEXT: addhn v1.4h, v5.4s, v0.4s 1867; CHECK-NEXT: addhn v0.4h, v6.4s, v3.4s 1868; CHECK-NEXT: addhn v3.4h, v7.4s, v26.4s 1869; CHECK-NEXT: addhn v2.4h, v16.4s, v20.4s 1870; CHECK-NEXT: add v5.4s, v22.4s, v21.4s 1871; CHECK-NEXT: add v6.4s, v23.4s, v21.4s 1872; CHECK-NEXT: add v7.4s, v24.4s, v21.4s 1873; CHECK-NEXT: add v16.4s, v25.4s, v21.4s 1874; CHECK-NEXT: addhn2 v0.8h, v18.4s, v5.4s 1875; CHECK-NEXT: addhn2 v1.8h, v17.4s, v6.4s 1876; CHECK-NEXT: addhn2 v3.8h, v4.4s, v7.4s 1877; CHECK-NEXT: addhn2 v2.8h, v19.4s, v16.4s 1878; CHECK-NEXT: ret 1879entry: 1880 %c = uitofp <32 x i8> %a to <32 x bfloat> 1881 ret <32 x bfloat> %c 1882} 1883;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1884; CHECK-GI: {{.*}} 1885; CHECK-GI-FP16: {{.*}} 1886; CHECK-GI-NOFP16: {{.*}} 1887; CHECK-SD: {{.*}} 1888; CHECK-SD-FP16: {{.*}} 1889; CHECK-SD-NOFP16: {{.*}} 1890