1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 < %s | FileCheck -check-prefix=NOFP16 %s 3 4declare void @f16_user(half) 5declare half @f16_result() 6 7declare void @v2f16_user(<2 x half>) 8declare <2 x half> @v2f16_result() 9 10declare void @v4f16_user(<4 x half>) 11declare <4 x half> @v4f16_result() 12 13declare void @v8f16_user(<8 x half>) 14declare <8 x half> @v8f16_result() 15 16define void @f16_arg(half %arg, ptr %ptr) #0 { 17; NOFP16-LABEL: f16_arg: 18; NOFP16: // %bb.0: 19; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill 20; NOFP16-NEXT: .cfi_def_cfa_offset 16 21; NOFP16-NEXT: .cfi_offset w19, -8 22; NOFP16-NEXT: .cfi_offset w30, -16 23; NOFP16-NEXT: and w0, w0, #0xffff 24; NOFP16-NEXT: mov x19, x1 25; NOFP16-NEXT: bl __gnu_h2f_ieee 26; NOFP16-NEXT: str w0, [x19] 27; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload 28; NOFP16-NEXT: ret 29 %fpext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") 30 store float %fpext, ptr %ptr 31 ret void 32} 33 34define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 { 35; NOFP16-LABEL: v2f16_arg: 36; NOFP16: // %bb.0: 37; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 38; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 39; NOFP16-NEXT: .cfi_def_cfa_offset 32 40; NOFP16-NEXT: .cfi_offset w19, -8 41; NOFP16-NEXT: .cfi_offset w20, -16 42; NOFP16-NEXT: .cfi_offset w21, -24 43; NOFP16-NEXT: .cfi_offset w30, -32 44; NOFP16-NEXT: and w0, w0, #0xffff 45; NOFP16-NEXT: mov x19, x2 46; NOFP16-NEXT: mov w20, w1 47; NOFP16-NEXT: bl __gnu_h2f_ieee 48; NOFP16-NEXT: mov w21, w0 49; NOFP16-NEXT: and w0, w20, #0xffff 50; NOFP16-NEXT: bl __gnu_h2f_ieee 51; NOFP16-NEXT: stp w21, w0, [x19] 52; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 53; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 54; NOFP16-NEXT: ret 55 %fpext = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") 56 store <2 x float> %fpext, ptr %ptr 57 ret void 58} 59 60define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 { 61; NOFP16-LABEL: v3f16_arg: 62; NOFP16: // %bb.0: 63; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill 64; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 65; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 66; NOFP16-NEXT: .cfi_def_cfa_offset 48 67; NOFP16-NEXT: .cfi_offset w19, -8 68; NOFP16-NEXT: .cfi_offset w20, -16 69; NOFP16-NEXT: .cfi_offset w21, -24 70; NOFP16-NEXT: .cfi_offset w22, -32 71; NOFP16-NEXT: .cfi_offset w30, -48 72; NOFP16-NEXT: mov w21, w0 73; NOFP16-NEXT: and w0, w1, #0xffff 74; NOFP16-NEXT: mov x19, x3 75; NOFP16-NEXT: mov w20, w2 76; NOFP16-NEXT: bl __gnu_h2f_ieee 77; NOFP16-NEXT: mov w22, w0 78; NOFP16-NEXT: and w0, w21, #0xffff 79; NOFP16-NEXT: bl __gnu_h2f_ieee 80; NOFP16-NEXT: mov w8, w0 81; NOFP16-NEXT: and w0, w20, #0xffff 82; NOFP16-NEXT: orr x21, x8, x22, lsl #32 83; NOFP16-NEXT: bl __gnu_h2f_ieee 84; NOFP16-NEXT: str x21, [x19] 85; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 86; NOFP16-NEXT: str w0, [x19, #8] 87; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 88; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload 89; NOFP16-NEXT: ret 90 %fpext = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") 91 store <3 x float> %fpext, ptr %ptr 92 ret void 93} 94 95define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 { 96; NOFP16-LABEL: v4f16_arg: 97; NOFP16: // %bb.0: 98; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill 99; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 100; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 101; NOFP16-NEXT: .cfi_def_cfa_offset 48 102; NOFP16-NEXT: .cfi_offset w19, -8 103; NOFP16-NEXT: .cfi_offset w20, -16 104; NOFP16-NEXT: .cfi_offset w21, -24 105; NOFP16-NEXT: .cfi_offset w22, -32 106; NOFP16-NEXT: .cfi_offset w23, -40 107; NOFP16-NEXT: .cfi_offset w30, -48 108; NOFP16-NEXT: and w0, w0, #0xffff 109; NOFP16-NEXT: mov x19, x4 110; NOFP16-NEXT: mov w20, w3 111; NOFP16-NEXT: mov w21, w2 112; NOFP16-NEXT: mov w22, w1 113; NOFP16-NEXT: bl __gnu_h2f_ieee 114; NOFP16-NEXT: mov w23, w0 115; NOFP16-NEXT: and w0, w22, #0xffff 116; NOFP16-NEXT: bl __gnu_h2f_ieee 117; NOFP16-NEXT: mov w22, w0 118; NOFP16-NEXT: and w0, w21, #0xffff 119; NOFP16-NEXT: bl __gnu_h2f_ieee 120; NOFP16-NEXT: mov w21, w0 121; NOFP16-NEXT: and w0, w20, #0xffff 122; NOFP16-NEXT: bl __gnu_h2f_ieee 123; NOFP16-NEXT: stp w21, w0, [x19, #8] 124; NOFP16-NEXT: stp w23, w22, [x19] 125; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 126; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 127; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload 128; NOFP16-NEXT: ret 129 %fpext = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %arg, metadata !"fpexcept.strict") 130 store <4 x float> %fpext, ptr %ptr 131 ret void 132} 133 134 define half @f16_return(float %arg) #0 { 135; NOFP16-LABEL: f16_return: 136; NOFP16: // %bb.0: 137; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 138; NOFP16-NEXT: .cfi_def_cfa_offset 16 139; NOFP16-NEXT: .cfi_offset w30, -16 140; NOFP16-NEXT: bl __gnu_f2h_ieee 141; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 142; NOFP16-NEXT: ret 143 %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") 144 ret half %fptrunc 145 } 146 147 define <2 x half> @v2f16_return(<2 x float> %arg) #0 { 148; NOFP16-LABEL: v2f16_return: 149; NOFP16: // %bb.0: 150; NOFP16-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill 151; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 152; NOFP16-NEXT: .cfi_def_cfa_offset 32 153; NOFP16-NEXT: .cfi_offset w19, -8 154; NOFP16-NEXT: .cfi_offset w20, -16 155; NOFP16-NEXT: .cfi_offset w30, -32 156; NOFP16-NEXT: mov w19, w0 157; NOFP16-NEXT: mov w0, w1 158; NOFP16-NEXT: bl __gnu_f2h_ieee 159; NOFP16-NEXT: mov w20, w0 160; NOFP16-NEXT: mov w0, w19 161; NOFP16-NEXT: bl __gnu_f2h_ieee 162; NOFP16-NEXT: mov w1, w20 163; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 164; NOFP16-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload 165; NOFP16-NEXT: ret 166 %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") 167 ret <2 x half> %fptrunc 168 } 169 170 define <3 x half> @v3f16_return(<3 x float> %arg) #0 { 171; NOFP16-LABEL: v3f16_return: 172; NOFP16: // %bb.0: 173; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill 174; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 175; NOFP16-NEXT: .cfi_def_cfa_offset 32 176; NOFP16-NEXT: .cfi_offset w19, -8 177; NOFP16-NEXT: .cfi_offset w20, -16 178; NOFP16-NEXT: .cfi_offset w21, -24 179; NOFP16-NEXT: .cfi_offset w30, -32 180; NOFP16-NEXT: mov w20, w0 181; NOFP16-NEXT: mov w0, w2 182; NOFP16-NEXT: mov w19, w1 183; NOFP16-NEXT: bl __gnu_f2h_ieee 184; NOFP16-NEXT: mov w21, w0 185; NOFP16-NEXT: mov w0, w19 186; NOFP16-NEXT: bl __gnu_f2h_ieee 187; NOFP16-NEXT: mov w19, w0 188; NOFP16-NEXT: mov w0, w20 189; NOFP16-NEXT: bl __gnu_f2h_ieee 190; NOFP16-NEXT: mov w1, w19 191; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 192; NOFP16-NEXT: mov w2, w21 193; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload 194; NOFP16-NEXT: ret 195 %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") 196 ret <3 x half> %fptrunc 197 } 198 199 define <4 x half> @v4f16_return(<4 x float> %arg) #0 { 200; NOFP16-LABEL: v4f16_return: 201; NOFP16: // %bb.0: 202; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill 203; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill 204; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill 205; NOFP16-NEXT: .cfi_def_cfa_offset 48 206; NOFP16-NEXT: .cfi_offset w19, -8 207; NOFP16-NEXT: .cfi_offset w20, -16 208; NOFP16-NEXT: .cfi_offset w21, -24 209; NOFP16-NEXT: .cfi_offset w22, -32 210; NOFP16-NEXT: .cfi_offset w30, -48 211; NOFP16-NEXT: mov w21, w0 212; NOFP16-NEXT: mov w0, w3 213; NOFP16-NEXT: mov w19, w2 214; NOFP16-NEXT: mov w20, w1 215; NOFP16-NEXT: bl __gnu_f2h_ieee 216; NOFP16-NEXT: mov w22, w0 217; NOFP16-NEXT: mov w0, w19 218; NOFP16-NEXT: bl __gnu_f2h_ieee 219; NOFP16-NEXT: mov w19, w0 220; NOFP16-NEXT: mov w0, w20 221; NOFP16-NEXT: bl __gnu_f2h_ieee 222; NOFP16-NEXT: mov w20, w0 223; NOFP16-NEXT: mov w0, w21 224; NOFP16-NEXT: bl __gnu_f2h_ieee 225; NOFP16-NEXT: mov w1, w20 226; NOFP16-NEXT: mov w2, w19 227; NOFP16-NEXT: mov w3, w22 228; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload 229; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload 230; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload 231; NOFP16-NEXT: ret 232 %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") 233 ret <4 x half> %fptrunc 234 } 235 236; FIXME: 237; define void @outgoing_f16_arg(ptr %ptr) #0 { 238; %val = load half, ptr %ptr 239; call void @f16_user(half %val) 240; ret void 241; } 242 243; define void @outgoing_v2f16_arg(ptr %ptr) #0 { 244; %val = load <2 x half>, ptr %ptr 245; call void @v2f16_user(<2 x half> %val) 246; ret void 247; } 248 249; define void @outgoing_f16_return(ptr %ptr) #0 { 250; %val = call half @f16_result() 251; store half %val, ptr %ptr 252; ret void 253; } 254 255; define void @outgoing_v2f16_return(ptr %ptr) #0 { 256; %val = call <2 x half> @v2f16_result() 257; store <2 x half> %val, ptr %ptr 258; ret void 259; } 260 261define void @outgoing_v4f16_return(ptr %ptr) #0 { 262; NOFP16-LABEL: outgoing_v4f16_return: 263; NOFP16: // %bb.0: 264; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill 265; NOFP16-NEXT: .cfi_def_cfa_offset 16 266; NOFP16-NEXT: .cfi_offset w19, -8 267; NOFP16-NEXT: .cfi_offset w30, -16 268; NOFP16-NEXT: mov x19, x0 269; NOFP16-NEXT: bl v4f16_result 270; NOFP16-NEXT: strh w2, [x19, #4] 271; NOFP16-NEXT: strh w3, [x19, #6] 272; NOFP16-NEXT: strh w1, [x19, #2] 273; NOFP16-NEXT: strh w0, [x19] 274; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload 275; NOFP16-NEXT: ret 276 %val = call <4 x half> @v4f16_result() #0 277 store <4 x half> %val, ptr %ptr 278 ret void 279} 280 281define void @outgoing_v8f16_return(ptr %ptr) #0 { 282; NOFP16-LABEL: outgoing_v8f16_return: 283; NOFP16: // %bb.0: 284; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill 285; NOFP16-NEXT: .cfi_def_cfa_offset 16 286; NOFP16-NEXT: .cfi_offset w19, -8 287; NOFP16-NEXT: .cfi_offset w30, -16 288; NOFP16-NEXT: mov x19, x0 289; NOFP16-NEXT: bl v8f16_result 290; NOFP16-NEXT: strh w5, [x19, #10] 291; NOFP16-NEXT: strh w7, [x19, #14] 292; NOFP16-NEXT: strh w6, [x19, #12] 293; NOFP16-NEXT: strh w4, [x19, #8] 294; NOFP16-NEXT: strh w3, [x19, #6] 295; NOFP16-NEXT: strh w2, [x19, #4] 296; NOFP16-NEXT: strh w1, [x19, #2] 297; NOFP16-NEXT: strh w0, [x19] 298; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload 299; NOFP16-NEXT: ret 300 %val = call <8 x half> @v8f16_result() #0 301 store <8 x half> %val, ptr %ptr 302 ret void 303} 304 305define half @call_split_type_used_outside_block_v8f16() #0 { 306; NOFP16-LABEL: call_split_type_used_outside_block_v8f16: 307; NOFP16: // %bb.0: // %bb0 308; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 309; NOFP16-NEXT: .cfi_def_cfa_offset 16 310; NOFP16-NEXT: .cfi_offset w30, -16 311; NOFP16-NEXT: bl v8f16_result 312; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 313; NOFP16-NEXT: ret 314bb0: 315 %split.ret.type = call <8 x half> @v8f16_result() #0 316 br label %bb1 317 318bb1: 319 %extract = extractelement <8 x half> %split.ret.type, i32 0 320 ret half %extract 321} 322 323declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0 324declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0 325declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0 326declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0 327 328declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0 329declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0 330declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0 331declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0 332 333attributes #0 = { strictfp } 334