1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple armeb-eabi -mattr=armv8.2-a,neon,fullfp16 -target-abi=aapcs-gnu -float-abi hard -o - %s | FileCheck %s 3 4;64 bit conversions to v4f16 5define void @conv_i64_to_v4f16( i64 %val, ptr %store ) { 6; CHECK-LABEL: conv_i64_to_v4f16: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vmov d16, r1, r0 9; CHECK-NEXT: vldr d17, [r2] 10; CHECK-NEXT: vrev64.16 d16, d16 11; CHECK-NEXT: vrev64.16 d17, d17 12; CHECK-NEXT: vadd.f16 d16, d16, d17 13; CHECK-NEXT: vrev64.16 d16, d16 14; CHECK-NEXT: vstr d16, [r2] 15; CHECK-NEXT: bx lr 16entry: 17 %v = bitcast i64 %val to <4 x half> 18 %w = load <4 x half>, ptr %store 19 %a = fadd <4 x half> %v, %w 20 store <4 x half> %a, ptr %store 21 ret void 22} 23 24define void @conv_f64_to_v4f16( double %val, ptr %store ) { 25; CHECK-LABEL: conv_f64_to_v4f16: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vldr d16, [r0] 28; CHECK-NEXT: vrev64.16 d17, d0 29; CHECK-NEXT: vrev64.16 d16, d16 30; CHECK-NEXT: vadd.f16 d16, d17, d16 31; CHECK-NEXT: vrev64.16 d16, d16 32; CHECK-NEXT: vstr d16, [r0] 33; CHECK-NEXT: bx lr 34entry: 35 %v = bitcast double %val to <4 x half> 36 %w = load <4 x half>, ptr %store 37 %a = fadd <4 x half> %v, %w 38 store <4 x half> %a, ptr %store 39 ret void 40} 41 42define void @conv_v2f32_to_v4f16( <2 x float> %a, ptr %store ) { 43; CHECK-LABEL: conv_v2f32_to_v4f16: 44; CHECK: @ %bb.0: @ %entry 45; CHECK-NEXT: vldr d16, .LCPI2_0 46; CHECK-NEXT: vrev64.32 d17, d0 47; CHECK-NEXT: vrev64.32 d16, d16 48; CHECK-NEXT: vadd.f32 d16, d17, d16 49; CHECK-NEXT: vldr d17, [r0] 50; CHECK-NEXT: vrev64.16 d17, d17 51; CHECK-NEXT: vrev32.16 d16, d16 52; CHECK-NEXT: vadd.f16 d16, d16, d17 53; CHECK-NEXT: vrev64.16 d16, d16 54; CHECK-NEXT: vstr d16, [r0] 55; CHECK-NEXT: bx lr 56; CHECK-NEXT: .p2align 3 57; CHECK-NEXT: @ %bb.1: 58; CHECK-NEXT: .LCPI2_0: 59; CHECK-NEXT: .long 0xbf800000 @ float -1 60; CHECK-NEXT: .long 0x3f800000 @ float 1 61entry: 62 %c = fadd <2 x float> %a, <float -1.0, float 1.0> 63 %v = bitcast <2 x float> %c to <4 x half> 64 %w = load <4 x half>, ptr %store 65 %z = fadd <4 x half> %v, %w 66 store <4 x half> %z, ptr %store 67 ret void 68} 69 70define void @conv_v2i32_to_v4f16( <2 x i32> %a, ptr %store ) { 71; CHECK-LABEL: conv_v2i32_to_v4f16: 72; CHECK: @ %bb.0: @ %entry 73; CHECK-NEXT: vldr d16, .LCPI3_0 74; CHECK-NEXT: vrev64.32 d17, d0 75; CHECK-NEXT: vrev64.32 d16, d16 76; CHECK-NEXT: vadd.i32 d16, d17, d16 77; CHECK-NEXT: vldr d18, [r0] 78; CHECK-NEXT: vrev64.16 d17, d18 79; CHECK-NEXT: vrev32.16 d16, d16 80; CHECK-NEXT: vadd.f16 d16, d16, d17 81; CHECK-NEXT: vrev64.16 d16, d16 82; CHECK-NEXT: vstr d16, [r0] 83; CHECK-NEXT: bx lr 84; CHECK-NEXT: .p2align 3 85; CHECK-NEXT: @ %bb.1: 86; CHECK-NEXT: .LCPI3_0: 87; CHECK-NEXT: .long 1 @ 0x1 88; CHECK-NEXT: .long 4294967295 @ 0xffffffff 89entry: 90 %c = add <2 x i32> %a, <i32 1, i32 -1> 91 %v = bitcast <2 x i32> %c to <4 x half> 92 %w = load <4 x half>, ptr %store 93 %z = fadd <4 x half> %v, %w 94 store <4 x half> %z, ptr %store 95 ret void 96} 97 98define void @conv_v4i16_to_v4f16( <4 x i16> %a, ptr %store ) { 99; CHECK-LABEL: conv_v4i16_to_v4f16: 100; CHECK: @ %bb.0: @ %entry 101; CHECK-NEXT: vmov.i64 d16, #0xffff00000000ffff 102; CHECK-NEXT: vldr d17, [r0] 103; CHECK-NEXT: vrev64.16 d18, d0 104; CHECK-NEXT: vadd.i16 d16, d18, d16 105; CHECK-NEXT: vrev64.16 d17, d17 106; CHECK-NEXT: vadd.f16 d16, d16, d17 107; CHECK-NEXT: vrev64.16 d16, d16 108; CHECK-NEXT: vstr d16, [r0] 109; CHECK-NEXT: bx lr 110entry: 111 %c = add <4 x i16> %a, <i16 -1, i16 0, i16 0, i16 -1> 112 %v = bitcast <4 x i16> %c to <4 x half> 113 %w = load <4 x half>, ptr %store 114 %z = fadd <4 x half> %v, %w 115 store <4 x half> %z, ptr %store 116 ret void 117} 118 119define void @conv_v8i8_to_v4f16( <8 x i8> %a, ptr %store ) { 120; CHECK-LABEL: conv_v8i8_to_v4f16: 121; CHECK: @ %bb.0: @ %entry 122; CHECK-NEXT: vmov.i8 d16, #0x1 123; CHECK-NEXT: vrev64.8 d17, d0 124; CHECK-NEXT: vldr d18, [r0] 125; CHECK-NEXT: vadd.i8 d16, d17, d16 126; CHECK-NEXT: vrev64.16 d17, d18 127; CHECK-NEXT: vrev16.8 d16, d16 128; CHECK-NEXT: vadd.f16 d16, d16, d17 129; CHECK-NEXT: vrev64.16 d16, d16 130; CHECK-NEXT: vstr d16, [r0] 131; CHECK-NEXT: bx lr 132entry: 133 %c = add <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 134 %v = bitcast <8 x i8> %c to <4 x half> 135 %w = load <4 x half>, ptr %store 136 %z = fadd <4 x half> %v, %w 137 store <4 x half> %z, ptr %store 138 ret void 139} 140 141define void @conv_v2i64_to_v8f16( <2 x i64> %val, ptr %store ) { 142; CHECK-LABEL: conv_v2i64_to_v8f16: 143; CHECK: @ %bb.0: @ %entry 144; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 145; CHECK-NEXT: adr r1, .LCPI6_0 146; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 147; CHECK-NEXT: vadd.i64 q9, q0, q9 148; CHECK-NEXT: vrev64.16 q8, q8 149; CHECK-NEXT: vrev64.16 q9, q9 150; CHECK-NEXT: vadd.f16 q8, q9, q8 151; CHECK-NEXT: vrev64.16 q8, q8 152; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 153; CHECK-NEXT: bx lr 154; CHECK-NEXT: .p2align 4 155; CHECK-NEXT: @ %bb.1: 156; CHECK-NEXT: .LCPI6_0: 157; CHECK-NEXT: .long 0 @ 0x0 158; CHECK-NEXT: .long 1 @ 0x1 159; CHECK-NEXT: .long 4294967295 @ 0xffffffff 160; CHECK-NEXT: .long 4294967295 @ 0xffffffff 161entry: 162 %v = add <2 x i64> %val, <i64 1, i64 -1> 163 %v1 = bitcast <2 x i64> %v to <8 x half> 164 %w = load <8 x half>, ptr %store 165 %a = fadd <8 x half> %v1, %w 166 store <8 x half> %a, ptr %store 167 ret void 168} 169define void @conv_v2f64_to_v8f16( <2 x double> %val, ptr %store ) { 170; CHECK-LABEL: conv_v2f64_to_v8f16: 171; CHECK: @ %bb.0: @ %entry 172; CHECK-NEXT: vmov.f64 d16, #-1.000000e+00 173; CHECK-NEXT: vmov.f64 d17, #1.000000e+00 174; CHECK-NEXT: vadd.f64 d19, d1, d16 175; CHECK-NEXT: vadd.f64 d18, d0, d17 176; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 177; CHECK-NEXT: vrev64.16 q8, q8 178; CHECK-NEXT: vrev64.16 q9, q9 179; CHECK-NEXT: vadd.f16 q8, q9, q8 180; CHECK-NEXT: vrev64.16 q8, q8 181; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 182; CHECK-NEXT: bx lr 183entry: 184 %v = fadd <2 x double> %val, <double 1.0, double -1.0> 185 %v1 = bitcast <2 x double> %v to <8 x half> 186 %w = load <8 x half>, ptr %store 187 %a = fadd <8 x half> %v1, %w 188 store <8 x half> %a, ptr %store 189 ret void 190} 191 192define void @conv_v4f32_to_v8f16( <4 x float> %a, ptr %store ) { 193; CHECK-LABEL: conv_v4f32_to_v8f16: 194; CHECK: @ %bb.0: @ %entry 195; CHECK-NEXT: adr r1, .LCPI8_0 196; CHECK-NEXT: vrev64.32 q9, q0 197; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 198; CHECK-NEXT: vrev64.32 q8, q8 199; CHECK-NEXT: vadd.f32 q8, q9, q8 200; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 201; CHECK-NEXT: vrev64.16 q9, q9 202; CHECK-NEXT: vrev32.16 q8, q8 203; CHECK-NEXT: vadd.f16 q8, q8, q9 204; CHECK-NEXT: vrev64.16 q8, q8 205; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 206; CHECK-NEXT: bx lr 207; CHECK-NEXT: .p2align 4 208; CHECK-NEXT: @ %bb.1: 209; CHECK-NEXT: .LCPI8_0: 210; CHECK-NEXT: .long 0xbf800000 @ float -1 211; CHECK-NEXT: .long 0x3f800000 @ float 1 212; CHECK-NEXT: .long 0xbf800000 @ float -1 213; CHECK-NEXT: .long 0x3f800000 @ float 1 214entry: 215 %c = fadd <4 x float> %a, <float -1.0, float 1.0, float -1.0, float 1.0> 216 %v = bitcast <4 x float> %c to <8 x half> 217 %w = load <8 x half>, ptr %store 218 %z = fadd <8 x half> %v, %w 219 store <8 x half> %z, ptr %store 220 ret void 221} 222 223define void @conv_v4i32_to_v8f16( <4 x i32> %a, ptr %store ) { 224; CHECK-LABEL: conv_v4i32_to_v8f16: 225; CHECK: @ %bb.0: @ %entry 226; CHECK-NEXT: adr r1, .LCPI9_0 227; CHECK-NEXT: vrev64.32 q9, q0 228; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 229; CHECK-NEXT: vrev64.32 q8, q8 230; CHECK-NEXT: vadd.i32 q8, q9, q8 231; CHECK-NEXT: vld1.64 {d20, d21}, [r0] 232; CHECK-NEXT: vrev64.16 q9, q10 233; CHECK-NEXT: vrev32.16 q8, q8 234; CHECK-NEXT: vadd.f16 q8, q8, q9 235; CHECK-NEXT: vrev64.16 q8, q8 236; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 237; CHECK-NEXT: bx lr 238; CHECK-NEXT: .p2align 4 239; CHECK-NEXT: @ %bb.1: 240; CHECK-NEXT: .LCPI9_0: 241; CHECK-NEXT: .long 4294967295 @ 0xffffffff 242; CHECK-NEXT: .long 1 @ 0x1 243; CHECK-NEXT: .long 4294967295 @ 0xffffffff 244; CHECK-NEXT: .long 1 @ 0x1 245entry: 246 %c = add <4 x i32> %a, <i32 -1, i32 1, i32 -1, i32 1> 247 %v = bitcast <4 x i32> %c to <8 x half> 248 %w = load <8 x half>, ptr %store 249 %z = fadd <8 x half> %v, %w 250 store <8 x half> %z, ptr %store 251 ret void 252} 253 254define void @conv_v8i16_to_v8f16( <8 x i16> %a, ptr %store ) { 255; CHECK-LABEL: conv_v8i16_to_v8f16: 256; CHECK: @ %bb.0: @ %entry 257; CHECK-NEXT: adr r1, .LCPI10_0 258; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 259; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 260; CHECK-NEXT: vrev64.16 q10, q0 261; CHECK-NEXT: vrev64.16 q8, q8 262; CHECK-NEXT: vrev64.16 q9, q9 263; CHECK-NEXT: vadd.i16 q8, q10, q8 264; CHECK-NEXT: vadd.f16 q8, q8, q9 265; CHECK-NEXT: vrev64.16 q8, q8 266; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 267; CHECK-NEXT: bx lr 268; CHECK-NEXT: .p2align 4 269; CHECK-NEXT: @ %bb.1: 270; CHECK-NEXT: .LCPI10_0: 271; CHECK-NEXT: .short 65535 @ 0xffff 272; CHECK-NEXT: .short 1 @ 0x1 273; CHECK-NEXT: .short 0 @ 0x0 274; CHECK-NEXT: .short 7 @ 0x7 275; CHECK-NEXT: .short 65535 @ 0xffff 276; CHECK-NEXT: .short 1 @ 0x1 277; CHECK-NEXT: .short 0 @ 0x0 278; CHECK-NEXT: .short 7 @ 0x7 279entry: 280 %c = add <8 x i16> %a, <i16 -1, i16 1, i16 0, i16 7, i16 -1, i16 1, i16 0, i16 7> 281 %v = bitcast <8 x i16> %c to <8 x half> 282 %w = load <8 x half>, ptr %store 283 %z = fadd <8 x half> %v, %w 284 store <8 x half> %z, ptr %store 285 ret void 286} 287 288define void @conv_v16i8_to_v8f16( <16 x i8> %a, ptr %store ) { 289; CHECK-LABEL: conv_v16i8_to_v8f16: 290; CHECK: @ %bb.0: @ %entry 291; CHECK-NEXT: vrev64.8 q8, q0 292; CHECK-NEXT: vmov.i8 q9, #0x1 293; CHECK-NEXT: vadd.i8 q8, q8, q9 294; CHECK-NEXT: vld1.64 {d20, d21}, [r0] 295; CHECK-NEXT: vrev64.16 q9, q10 296; CHECK-NEXT: vrev16.8 q8, q8 297; CHECK-NEXT: vadd.f16 q8, q8, q9 298; CHECK-NEXT: vrev64.16 q8, q8 299; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 300; CHECK-NEXT: bx lr 301entry: 302 %c = add <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 303 %v = bitcast <16 x i8> %c to <8 x half> 304 %w = load <8 x half>, ptr %store 305 %z = fadd <8 x half> %v, %w 306 store <8 x half> %z, ptr %store 307 ret void 308} 309 310define void @conv_v4f16_to_i64( <4 x half> %a, ptr %store ) { 311; CHECK-LABEL: conv_v4f16_to_i64: 312; CHECK: @ %bb.0: @ %entry 313; CHECK-NEXT: vldr d16, .LCPI12_0 314; CHECK-NEXT: vrev64.16 d17, d0 315; CHECK-NEXT: vrev64.16 d16, d16 316; CHECK-NEXT: vadd.f16 d16, d17, d16 317; CHECK-NEXT: vrev64.16 d16, d16 318; CHECK-NEXT: vmov r1, r2, d16 319; CHECK-NEXT: subs r1, r1, #1 320; CHECK-NEXT: sbc r2, r2, #0 321; CHECK-NEXT: str r2, [r0] 322; CHECK-NEXT: str r1, [r0, #4] 323; CHECK-NEXT: bx lr 324; CHECK-NEXT: .p2align 3 325; CHECK-NEXT: @ %bb.1: 326; CHECK-NEXT: .LCPI12_0: 327; CHECK-NEXT: .short 0xbc00 @ half -1 328; CHECK-NEXT: .short 0x3c00 @ half 1 329; CHECK-NEXT: .short 0xbc00 @ half -1 330; CHECK-NEXT: .short 0x3c00 @ half 1 331entry: 332 %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0> 333 %y = bitcast <4 x half> %z to i64 334 %w = add i64 %y, -1 335 store i64 %w, ptr %store 336 ret void 337} 338 339define void @conv_v4f16_to_f64( <4 x half> %a, ptr %store ) { 340; CHECK-LABEL: conv_v4f16_to_f64: 341; CHECK: @ %bb.0: @ %entry 342; CHECK-NEXT: vldr d16, .LCPI13_0 343; CHECK-NEXT: vrev64.16 d17, d0 344; CHECK-NEXT: vrev64.16 d16, d16 345; CHECK-NEXT: vadd.f16 d16, d17, d16 346; CHECK-NEXT: vmov.f64 d17, #-1.000000e+00 347; CHECK-NEXT: vrev64.16 d16, d16 348; CHECK-NEXT: vadd.f64 d16, d16, d17 349; CHECK-NEXT: vstr d16, [r0] 350; CHECK-NEXT: bx lr 351; CHECK-NEXT: .p2align 3 352; CHECK-NEXT: @ %bb.1: 353; CHECK-NEXT: .LCPI13_0: 354; CHECK-NEXT: .short 0xbc00 @ half -1 355; CHECK-NEXT: .short 0x3c00 @ half 1 356; CHECK-NEXT: .short 0xbc00 @ half -1 357; CHECK-NEXT: .short 0x3c00 @ half 1 358entry: 359 %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0> 360 %y = bitcast <4 x half> %z to double 361 %w = fadd double %y, -1.0 362 store double %w, ptr %store 363 ret void 364} 365 366define void @conv_v4f16_to_v2i32( <4 x half> %a, ptr %store ) { 367; CHECK-LABEL: conv_v4f16_to_v2i32: 368; CHECK: @ %bb.0: @ %entry 369; CHECK-NEXT: vldr d16, .LCPI14_0 370; CHECK-NEXT: vrev64.16 d17, d0 371; CHECK-NEXT: vrev64.16 d16, d16 372; CHECK-NEXT: vadd.f16 d16, d17, d16 373; CHECK-NEXT: vldr d17, .LCPI14_1 374; CHECK-NEXT: vrev64.32 d17, d17 375; CHECK-NEXT: vrev32.16 d16, d16 376; CHECK-NEXT: vadd.i32 d16, d16, d17 377; CHECK-NEXT: vrev64.32 d16, d16 378; CHECK-NEXT: vstr d16, [r0] 379; CHECK-NEXT: bx lr 380; CHECK-NEXT: .p2align 3 381; CHECK-NEXT: @ %bb.1: 382; CHECK-NEXT: .LCPI14_0: 383; CHECK-NEXT: .short 0xbc00 @ half -1 384; CHECK-NEXT: .short 0x3c00 @ half 1 385; CHECK-NEXT: .short 0xbc00 @ half -1 386; CHECK-NEXT: .short 0x3c00 @ half 1 387; CHECK-NEXT: .LCPI14_1: 388; CHECK-NEXT: .long 4294967295 @ 0xffffffff 389; CHECK-NEXT: .long 1 @ 0x1 390entry: 391 %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0> 392 %y = bitcast <4 x half> %z to <2 x i32> 393 %w = add <2 x i32> %y, <i32 -1, i32 1> 394 store <2 x i32> %w, ptr %store 395 ret void 396} 397 398define void @conv_v4f16_to_v2f32( <4 x half> %a, ptr %store ) { 399; CHECK-LABEL: conv_v4f16_to_v2f32: 400; CHECK: @ %bb.0: @ %entry 401; CHECK-NEXT: vldr d16, .LCPI15_0 402; CHECK-NEXT: vrev64.16 d17, d0 403; CHECK-NEXT: vrev64.16 d16, d16 404; CHECK-NEXT: vadd.f16 d16, d17, d16 405; CHECK-NEXT: vldr d17, .LCPI15_1 406; CHECK-NEXT: vrev64.32 d17, d17 407; CHECK-NEXT: vrev32.16 d16, d16 408; CHECK-NEXT: vadd.f32 d16, d16, d17 409; CHECK-NEXT: vrev64.32 d16, d16 410; CHECK-NEXT: vstr d16, [r0] 411; CHECK-NEXT: bx lr 412; CHECK-NEXT: .p2align 3 413; CHECK-NEXT: @ %bb.1: 414; CHECK-NEXT: .LCPI15_0: 415; CHECK-NEXT: .short 0xbc00 @ half -1 416; CHECK-NEXT: .short 0x3c00 @ half 1 417; CHECK-NEXT: .short 0xbc00 @ half -1 418; CHECK-NEXT: .short 0x3c00 @ half 1 419; CHECK-NEXT: .LCPI15_1: 420; CHECK-NEXT: .long 0xbf800000 @ float -1 421; CHECK-NEXT: .long 0x3f800000 @ float 1 422entry: 423 %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0> 424 %y = bitcast <4 x half> %z to <2 x float> 425 %w = fadd <2 x float> %y, <float -1.0, float 1.0> 426 store <2 x float> %w, ptr %store 427 ret void 428} 429 430define void @conv_v4f16_to_v4i16( <4 x half> %a, ptr %store ) { 431; CHECK-LABEL: conv_v4f16_to_v4i16: 432; CHECK: @ %bb.0: @ %entry 433; CHECK-NEXT: vldr d16, .LCPI16_0 434; CHECK-NEXT: vrev64.16 d17, d0 435; CHECK-NEXT: vrev64.16 d16, d16 436; CHECK-NEXT: vadd.f16 d16, d17, d16 437; CHECK-NEXT: vldr d17, .LCPI16_1 438; CHECK-NEXT: vrev64.16 d17, d17 439; CHECK-NEXT: vadd.i16 d16, d16, d17 440; CHECK-NEXT: vrev64.16 d16, d16 441; CHECK-NEXT: vstr d16, [r0] 442; CHECK-NEXT: bx lr 443; CHECK-NEXT: .p2align 3 444; CHECK-NEXT: @ %bb.1: 445; CHECK-NEXT: .LCPI16_0: 446; CHECK-NEXT: .short 0xbc00 @ half -1 447; CHECK-NEXT: .short 0x3c00 @ half 1 448; CHECK-NEXT: .short 0xbc00 @ half -1 449; CHECK-NEXT: .short 0x3c00 @ half 1 450; CHECK-NEXT: .LCPI16_1: 451; CHECK-NEXT: .short 65535 @ 0xffff 452; CHECK-NEXT: .short 1 @ 0x1 453; CHECK-NEXT: .short 0 @ 0x0 454; CHECK-NEXT: .short 7 @ 0x7 455entry: 456 %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0> 457 %y = bitcast <4 x half> %z to <4 x i16> 458 %w = add <4 x i16> %y, <i16 -1, i16 1, i16 0, i16 7> 459 store <4 x i16> %w, ptr %store 460 ret void 461} 462 463define void @conv_v4f16_to_v8f8( <4 x half> %a, ptr %store ) { 464; CHECK-LABEL: conv_v4f16_to_v8f8: 465; CHECK: @ %bb.0: @ %entry 466; CHECK-NEXT: vldr d16, .LCPI17_0 467; CHECK-NEXT: vrev64.16 d17, d0 468; CHECK-NEXT: vrev64.16 d16, d16 469; CHECK-NEXT: vadd.f16 d16, d17, d16 470; CHECK-NEXT: vmov.i8 d17, #0x1 471; CHECK-NEXT: vrev16.8 d16, d16 472; CHECK-NEXT: vadd.i8 d16, d16, d17 473; CHECK-NEXT: vrev64.8 d16, d16 474; CHECK-NEXT: vstr d16, [r0] 475; CHECK-NEXT: bx lr 476; CHECK-NEXT: .p2align 3 477; CHECK-NEXT: @ %bb.1: 478; CHECK-NEXT: .LCPI17_0: 479; CHECK-NEXT: .short 0xbc00 @ half -1 480; CHECK-NEXT: .short 0x3c00 @ half 1 481; CHECK-NEXT: .short 0xbc00 @ half -1 482; CHECK-NEXT: .short 0x3c00 @ half 1 483entry: 484 %z = fadd <4 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0> 485 %y = bitcast <4 x half> %z to <8 x i8> 486 %w = add <8 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 487 store <8 x i8> %w, ptr %store 488 ret void 489} 490 491define void @conv_v8f16_to_i128( <8 x half> %a, ptr %store ) { 492; CHECK-LABEL: conv_v8f16_to_i128: 493; CHECK: @ %bb.0: @ %entry 494; CHECK-NEXT: .save {r11, lr} 495; CHECK-NEXT: push {r11, lr} 496; CHECK-NEXT: adr r1, .LCPI18_0 497; CHECK-NEXT: vrev64.16 q9, q0 498; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 499; CHECK-NEXT: vrev64.16 q8, q8 500; CHECK-NEXT: vadd.f16 q8, q9, q8 501; CHECK-NEXT: vrev32.16 q8, q8 502; CHECK-NEXT: vmov r12, r2, d17 503; CHECK-NEXT: vmov r3, r1, d16 504; CHECK-NEXT: subs lr, r2, #1 505; CHECK-NEXT: sbcs r2, r12, #0 506; CHECK-NEXT: sbcs r1, r1, #0 507; CHECK-NEXT: sbc r3, r3, #0 508; CHECK-NEXT: str r3, [r0] 509; CHECK-NEXT: stmib r0, {r1, r2, lr} 510; CHECK-NEXT: pop {r11, pc} 511; CHECK-NEXT: .p2align 4 512; CHECK-NEXT: @ %bb.1: 513; CHECK-NEXT: .LCPI18_0: 514; CHECK-NEXT: .short 0xbc00 @ half -1 515; CHECK-NEXT: .short 0x3c00 @ half 1 516; CHECK-NEXT: .short 0xbc00 @ half -1 517; CHECK-NEXT: .short 0x3c00 @ half 1 518; CHECK-NEXT: .short 0xbc00 @ half -1 519; CHECK-NEXT: .short 0x3c00 @ half 1 520; CHECK-NEXT: .short 0xbc00 @ half -1 521; CHECK-NEXT: .short 0x3c00 @ half 1 522entry: 523 %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0> 524 %y = bitcast <8 x half> %z to i128 525 %w = add i128 %y, -1 526 store i128 %w, ptr %store 527 ret void 528} 529 530define void @conv_v8f16_to_v2f64( <8 x half> %a, ptr %store ) { 531; CHECK-LABEL: conv_v8f16_to_v2f64: 532; CHECK: @ %bb.0: @ %entry 533; CHECK-NEXT: adr r1, .LCPI19_0 534; CHECK-NEXT: vrev64.16 q9, q0 535; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 536; CHECK-NEXT: vrev64.16 q8, q8 537; CHECK-NEXT: vadd.f16 q8, q9, q8 538; CHECK-NEXT: vmov.f64 d18, #1.000000e+00 539; CHECK-NEXT: vrev64.16 q8, q8 540; CHECK-NEXT: vmov.f64 d19, #-1.000000e+00 541; CHECK-NEXT: vadd.f64 d21, d17, d18 542; CHECK-NEXT: vadd.f64 d20, d16, d19 543; CHECK-NEXT: vst1.64 {d20, d21}, [r0] 544; CHECK-NEXT: bx lr 545; CHECK-NEXT: .p2align 4 546; CHECK-NEXT: @ %bb.1: 547; CHECK-NEXT: .LCPI19_0: 548; CHECK-NEXT: .short 0xbc00 @ half -1 549; CHECK-NEXT: .short 0x3c00 @ half 1 550; CHECK-NEXT: .short 0xbc00 @ half -1 551; CHECK-NEXT: .short 0x3c00 @ half 1 552; CHECK-NEXT: .short 0xbc00 @ half -1 553; CHECK-NEXT: .short 0x3c00 @ half 1 554; CHECK-NEXT: .short 0xbc00 @ half -1 555; CHECK-NEXT: .short 0x3c00 @ half 1 556entry: 557 %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0> 558 %y = bitcast <8 x half> %z to <2 x double> 559 %w = fadd <2 x double> %y, <double -1.0, double 1.0> 560 store <2 x double> %w, ptr %store 561 ret void 562} 563 564define void @conv_v8f16_to_v4i32( <8 x half> %a, ptr %store ) { 565; CHECK-LABEL: conv_v8f16_to_v4i32: 566; CHECK: @ %bb.0: @ %entry 567; CHECK-NEXT: adr r1, .LCPI20_0 568; CHECK-NEXT: vrev64.16 q9, q0 569; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 570; CHECK-NEXT: adr r1, .LCPI20_1 571; CHECK-NEXT: vrev64.16 q8, q8 572; CHECK-NEXT: vadd.f16 q8, q9, q8 573; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 574; CHECK-NEXT: vrev64.32 q9, q9 575; CHECK-NEXT: vrev32.16 q8, q8 576; CHECK-NEXT: vadd.i32 q8, q8, q9 577; CHECK-NEXT: vrev64.32 q8, q8 578; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 579; CHECK-NEXT: bx lr 580; CHECK-NEXT: .p2align 4 581; CHECK-NEXT: @ %bb.1: 582; CHECK-NEXT: .LCPI20_0: 583; CHECK-NEXT: .short 0xbc00 @ half -1 584; CHECK-NEXT: .short 0x3c00 @ half 1 585; CHECK-NEXT: .short 0xbc00 @ half -1 586; CHECK-NEXT: .short 0x3c00 @ half 1 587; CHECK-NEXT: .short 0xbc00 @ half -1 588; CHECK-NEXT: .short 0x3c00 @ half 1 589; CHECK-NEXT: .short 0xbc00 @ half -1 590; CHECK-NEXT: .short 0x3c00 @ half 1 591; CHECK-NEXT: .LCPI20_1: 592; CHECK-NEXT: .long 4294967295 @ 0xffffffff 593; CHECK-NEXT: .long 1 @ 0x1 594; CHECK-NEXT: .long 4294967295 @ 0xffffffff 595; CHECK-NEXT: .long 1 @ 0x1 596entry: 597 %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0> 598 %y = bitcast <8 x half> %z to <4 x i32> 599 %w = add <4 x i32> %y, <i32 -1, i32 1, i32 -1, i32 1> 600 store <4 x i32> %w, ptr %store 601 ret void 602} 603 604define void @conv_v8f16_to_v4f32( <8 x half> %a, ptr %store ) { 605; CHECK-LABEL: conv_v8f16_to_v4f32: 606; CHECK: @ %bb.0: @ %entry 607; CHECK-NEXT: adr r1, .LCPI21_0 608; CHECK-NEXT: vrev64.16 q9, q0 609; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 610; CHECK-NEXT: adr r1, .LCPI21_1 611; CHECK-NEXT: vrev64.16 q8, q8 612; CHECK-NEXT: vadd.f16 q8, q9, q8 613; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 614; CHECK-NEXT: vrev64.32 q9, q9 615; CHECK-NEXT: vrev32.16 q8, q8 616; CHECK-NEXT: vadd.f32 q8, q8, q9 617; CHECK-NEXT: vrev64.32 q8, q8 618; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 619; CHECK-NEXT: bx lr 620; CHECK-NEXT: .p2align 4 621; CHECK-NEXT: @ %bb.1: 622; CHECK-NEXT: .LCPI21_0: 623; CHECK-NEXT: .short 0xbc00 @ half -1 624; CHECK-NEXT: .short 0x3c00 @ half 1 625; CHECK-NEXT: .short 0xbc00 @ half -1 626; CHECK-NEXT: .short 0x3c00 @ half 1 627; CHECK-NEXT: .short 0xbc00 @ half -1 628; CHECK-NEXT: .short 0x3c00 @ half 1 629; CHECK-NEXT: .short 0xbc00 @ half -1 630; CHECK-NEXT: .short 0x3c00 @ half 1 631; CHECK-NEXT: .LCPI21_1: 632; CHECK-NEXT: .long 0xbf800000 @ float -1 633; CHECK-NEXT: .long 0x3f800000 @ float 1 634; CHECK-NEXT: .long 0xbf800000 @ float -1 635; CHECK-NEXT: .long 0x3f800000 @ float 1 636entry: 637 %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0> 638 %y = bitcast <8 x half> %z to <4 x float> 639 %w = fadd <4 x float> %y, <float -1.0, float 1.0, float -1.0, float 1.0> 640 store <4 x float> %w, ptr %store 641 ret void 642} 643 644define void @conv_v8f16_to_v8i16( <8 x half> %a, ptr %store ) { 645; CHECK-LABEL: conv_v8f16_to_v8i16: 646; CHECK: @ %bb.0: @ %entry 647; CHECK-NEXT: adr r1, .LCPI22_0 648; CHECK-NEXT: vrev64.16 q9, q0 649; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 650; CHECK-NEXT: adr r1, .LCPI22_1 651; CHECK-NEXT: vrev64.16 q8, q8 652; CHECK-NEXT: vadd.f16 q8, q9, q8 653; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 654; CHECK-NEXT: vrev64.16 q9, q9 655; CHECK-NEXT: vadd.i16 q8, q8, q9 656; CHECK-NEXT: vrev64.16 q8, q8 657; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 658; CHECK-NEXT: bx lr 659; CHECK-NEXT: .p2align 4 660; CHECK-NEXT: @ %bb.1: 661; CHECK-NEXT: .LCPI22_0: 662; CHECK-NEXT: .short 0xbc00 @ half -1 663; CHECK-NEXT: .short 0x3c00 @ half 1 664; CHECK-NEXT: .short 0xbc00 @ half -1 665; CHECK-NEXT: .short 0x3c00 @ half 1 666; CHECK-NEXT: .short 0xbc00 @ half -1 667; CHECK-NEXT: .short 0x3c00 @ half 1 668; CHECK-NEXT: .short 0xbc00 @ half -1 669; CHECK-NEXT: .short 0x3c00 @ half 1 670; CHECK-NEXT: .LCPI22_1: 671; CHECK-NEXT: .short 65535 @ 0xffff 672; CHECK-NEXT: .short 1 @ 0x1 673; CHECK-NEXT: .short 0 @ 0x0 674; CHECK-NEXT: .short 7 @ 0x7 675; CHECK-NEXT: .short 65535 @ 0xffff 676; CHECK-NEXT: .short 1 @ 0x1 677; CHECK-NEXT: .short 0 @ 0x0 678; CHECK-NEXT: .short 7 @ 0x7 679entry: 680 %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0> 681 %y = bitcast <8 x half> %z to <8 x i16> 682 %w = add <8 x i16> %y, <i16 -1, i16 1, i16 0, i16 7, i16 -1, i16 1, i16 0, i16 7> 683 store <8 x i16> %w, ptr %store 684 ret void 685} 686 687define void @conv_v8f16_to_v8f8( <8 x half> %a, ptr %store ) { 688; CHECK-LABEL: conv_v8f16_to_v8f8: 689; CHECK: @ %bb.0: @ %entry 690; CHECK-NEXT: adr r1, .LCPI23_0 691; CHECK-NEXT: vrev64.16 q9, q0 692; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 693; CHECK-NEXT: vrev64.16 q8, q8 694; CHECK-NEXT: vadd.f16 q8, q9, q8 695; CHECK-NEXT: vmov.i8 q9, #0x1 696; CHECK-NEXT: vrev16.8 q8, q8 697; CHECK-NEXT: vadd.i8 q8, q8, q9 698; CHECK-NEXT: vrev64.8 q8, q8 699; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 700; CHECK-NEXT: bx lr 701; CHECK-NEXT: .p2align 4 702; CHECK-NEXT: @ %bb.1: 703; CHECK-NEXT: .LCPI23_0: 704; CHECK-NEXT: .short 0xbc00 @ half -1 705; CHECK-NEXT: .short 0x3c00 @ half 1 706; CHECK-NEXT: .short 0xbc00 @ half -1 707; CHECK-NEXT: .short 0x3c00 @ half 1 708; CHECK-NEXT: .short 0xbc00 @ half -1 709; CHECK-NEXT: .short 0x3c00 @ half 1 710; CHECK-NEXT: .short 0xbc00 @ half -1 711; CHECK-NEXT: .short 0x3c00 @ half 1 712entry: 713 %z = fadd <8 x half> %a, <half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0, half -1.0, half 1.0> 714 %y = bitcast <8 x half> %z to <16 x i8> 715 %w = add <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 716 store <16 x i8> %w, ptr %store 717 ret void 718} 719