1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 < %s | FileCheck %s 3 4; Test cases derived from float/double tests in fp-logic.ll 5 6; 1 FP operand, 1 int operand, int result 7 8define i16 @f1(half %x, i16 %y) { 9; CHECK-LABEL: f1: 10; CHECK: # %bb.0: 11; CHECK-NEXT: vmovw %xmm0, %eax 12; CHECK-NEXT: andl %edi, %eax 13; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 14; CHECK-NEXT: retq 15 %bc1 = bitcast half %x to i16 16 %and = and i16 %bc1, %y 17 ret i16 %and 18} 19 20; Swap operands of the logic op. 21 22define i16 @f2(half %x, i16 %y) { 23; CHECK-LABEL: f2: 24; CHECK: # %bb.0: 25; CHECK-NEXT: vmovw %xmm0, %eax 26; CHECK-NEXT: andl %edi, %eax 27; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 28; CHECK-NEXT: retq 29 %bc1 = bitcast half %x to i16 30 %and = and i16 %y, %bc1 31 ret i16 %and 32} 33 34; 1 FP operand, 1 constant operand, int result 35 36define i16 @f3(half %x) { 37; CHECK-LABEL: f3: 38; CHECK: # %bb.0: 39; CHECK-NEXT: vmovw %xmm0, %eax 40; CHECK-NEXT: andl $1, %eax 41; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 42; CHECK-NEXT: retq 43 %bc1 = bitcast half %x to i16 44 %and = and i16 %bc1, 1 45 ret i16 %and 46} 47 48; Swap operands of the logic op. 49 50define i16 @f4(half %x) { 51; CHECK-LABEL: f4: 52; CHECK: # %bb.0: 53; CHECK-NEXT: vmovw %xmm0, %eax 54; CHECK-NEXT: andl $2, %eax 55; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 56; CHECK-NEXT: retq 57 %bc1 = bitcast half %x to i16 58 %and = and i16 2, %bc1 59 ret i16 %and 60} 61 62; 1 FP operand, 1 integer operand, FP result 63 64define half @f5(half %x, i16 %y) { 65; CHECK-LABEL: f5: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vmovw %edi, %xmm1 68; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 69; CHECK-NEXT: retq 70 %bc1 = bitcast half %x to i16 71 %and = and i16 %bc1, %y 72 %bc2 = bitcast i16 %and to half 73 ret half %bc2 74} 75 76; Swap operands of the logic op. 77 78define half @f6(half %x, i16 %y) { 79; CHECK-LABEL: f6: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vmovw %edi, %xmm1 82; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 83; CHECK-NEXT: retq 84 %bc1 = bitcast half %x to i16 85 %and = and i16 %y, %bc1 86 %bc2 = bitcast i16 %and to half 87 ret half %bc2 88} 89 90; 1 FP operand, 1 constant operand, FP result 91 92define half @f7(half %x) { 93; CHECK-LABEL: f7: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = [1.7881E-7,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] 96; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 97; CHECK-NEXT: retq 98 %bc1 = bitcast half %x to i16 99 %and = and i16 %bc1, 3 100 %bc2 = bitcast i16 %and to half 101 ret half %bc2 102} 103 104; Swap operands of the logic op. 105 106define half @f8(half %x) { 107; CHECK-LABEL: f8: 108; CHECK: # %bb.0: 109; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = [2.3842E-7,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] 110; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 111; CHECK-NEXT: retq 112 %bc1 = bitcast half %x to i16 113 %and = and i16 4, %bc1 114 %bc2 = bitcast i16 %and to half 115 ret half %bc2 116} 117 118; 2 FP operands, int result 119 120define i16 @f9(half %x, half %y) { 121; CHECK-LABEL: f9: 122; CHECK: # %bb.0: 123; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 124; CHECK-NEXT: vmovw %xmm0, %eax 125; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 126; CHECK-NEXT: retq 127 %bc1 = bitcast half %x to i16 128 %bc2 = bitcast half %y to i16 129 %and = and i16 %bc1, %bc2 130 ret i16 %and 131} 132 133; 2 FP operands, FP result 134 135define half @f10(half %x, half %y) { 136; CHECK-LABEL: f10: 137; CHECK: # %bb.0: 138; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 139; CHECK-NEXT: retq 140 %bc1 = bitcast half %x to i16 141 %bc2 = bitcast half %y to i16 142 %and = and i16 %bc1, %bc2 143 %bc3 = bitcast i16 %and to half 144 ret half %bc3 145} 146 147define half @or(half %x, half %y) { 148; CHECK-LABEL: or: 149; CHECK: # %bb.0: 150; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 151; CHECK-NEXT: retq 152 %bc1 = bitcast half %x to i16 153 %bc2 = bitcast half %y to i16 154 %and = or i16 %bc1, %bc2 155 %bc3 = bitcast i16 %and to half 156 ret half %bc3 157} 158 159define half @xor(half %x, half %y) { 160; CHECK-LABEL: xor: 161; CHECK: # %bb.0: 162; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 163; CHECK-NEXT: retq 164 %bc1 = bitcast half %x to i16 165 %bc2 = bitcast half %y to i16 166 %and = xor i16 %bc1, %bc2 167 %bc3 = bitcast i16 %and to half 168 ret half %bc3 169} 170 171define half @f7_or(half %x) { 172; CHECK-LABEL: f7_or: 173; CHECK: # %bb.0: 174; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = [1.7881E-7,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] 175; CHECK-NEXT: vorps %xmm1, %xmm0, %xmm0 176; CHECK-NEXT: retq 177 %bc1 = bitcast half %x to i16 178 %and = or i16 %bc1, 3 179 %bc2 = bitcast i16 %and to half 180 ret half %bc2 181} 182 183define half @f7_xor(half %x) { 184; CHECK-LABEL: f7_xor: 185; CHECK: # %bb.0: 186; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = [1.7881E-7,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] 187; CHECK-NEXT: vxorps %xmm1, %xmm0, %xmm0 188; CHECK-NEXT: retq 189 %bc1 = bitcast half %x to i16 190 %and = xor i16 %bc1, 3 191 %bc2 = bitcast i16 %and to half 192 ret half %bc2 193} 194 195; Grabbing the sign bit is a special case that could be handled 196; by movmskps/movmskpd, but if we're not shifting it over, then 197; a simple FP logic op is cheaper. 198 199define half @movmsk(half %x) { 200; CHECK-LABEL: movmsk: 201; CHECK: # %bb.0: 202; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = [-0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] 203; CHECK-NEXT: vandps %xmm1, %xmm0, %xmm0 204; CHECK-NEXT: retq 205 %bc1 = bitcast half %x to i16 206 %and = and i16 %bc1, 32768 207 %bc2 = bitcast i16 %and to half 208 ret half %bc2 209} 210 211define half @bitcast_fabs(half %x) { 212; CHECK-LABEL: bitcast_fabs: 213; CHECK: # %bb.0: 214; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 215; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 216; CHECK-NEXT: retq 217 %bc1 = bitcast half %x to i16 218 %and = and i16 %bc1, 32767 219 %bc2 = bitcast i16 %and to half 220 ret half %bc2 221} 222 223define half @bitcast_fneg(half %x) { 224; CHECK-LABEL: bitcast_fneg: 225; CHECK: # %bb.0: 226; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 227; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 228; CHECK-NEXT: retq 229 %bc1 = bitcast half %x to i16 230 %xor = xor i16 %bc1, 32768 231 %bc2 = bitcast i16 %xor to half 232 ret half %bc2 233} 234 235define <8 x half> @bitcast_fabs_vec(<8 x half> %x) { 236; CHECK-LABEL: bitcast_fabs_vec: 237; CHECK: # %bb.0: 238; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 239; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 240; CHECK-NEXT: retq 241 %bc1 = bitcast <8 x half> %x to <8 x i16> 242 %and = and <8 x i16> %bc1, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> 243 %bc2 = bitcast <8 x i16> %and to <8 x half> 244 ret <8 x half> %bc2 245} 246 247define <8 x half> @bitcast_fneg_vec(<8 x half> %x) { 248; CHECK-LABEL: bitcast_fneg_vec: 249; CHECK: # %bb.0: 250; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 251; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 252; CHECK-NEXT: retq 253 %bc1 = bitcast <8 x half> %x to <8 x i16> 254 %xor = xor <8 x i16> %bc1, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768> 255 %bc2 = bitcast <8 x i16> %xor to <8 x half> 256 ret <8 x half> %bc2 257} 258 259define half @fadd_bitcast_fneg(half %x, half %y) { 260; CHECK-LABEL: fadd_bitcast_fneg: 261; CHECK: # %bb.0: 262; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 263; CHECK-NEXT: retq 264 %bc1 = bitcast half %y to i16 265 %xor = xor i16 %bc1, 32768 266 %bc2 = bitcast i16 %xor to half 267 %fadd = fadd half %x, %bc2 268 ret half %fadd 269} 270 271define half @fsub_bitcast_fneg(half %x, half %y) { 272; CHECK-LABEL: fsub_bitcast_fneg: 273; CHECK: # %bb.0: 274; CHECK-NEXT: vmovsh {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0] 275; CHECK-NEXT: vxorps %xmm2, %xmm1, %xmm1 276; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0 277; CHECK-NEXT: retq 278 %bc1 = bitcast half %y to i16 279 %xor = xor i16 %bc1, 32767 280 %bc2 = bitcast i16 %xor to half 281 %fsub = fsub half %x, %bc2 282 ret half %fsub 283} 284 285define half @nabs(half %a) { 286; CHECK-LABEL: nabs: 287; CHECK: # %bb.0: 288; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 289; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 290; CHECK-NEXT: retq 291 %conv = bitcast half %a to i16 292 %and = or i16 %conv, -32768 293 %conv1 = bitcast i16 %and to half 294 ret half %conv1 295} 296 297define <8 x half> @nabsv8f16(<8 x half> %a) { 298; CHECK-LABEL: nabsv8f16: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 301; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 302; CHECK-NEXT: retq 303 %conv = bitcast <8 x half> %a to <8 x i16> 304 %and = or <8 x i16> %conv, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768> 305 %conv1 = bitcast <8 x i16> %and to <8 x half> 306 ret <8 x half> %conv1 307} 308 309define <8 x half> @fadd_bitcast_fneg_vec(<8 x half> %x, <8 x half> %y) { 310; CHECK-LABEL: fadd_bitcast_fneg_vec: 311; CHECK: # %bb.0: 312; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 313; CHECK-NEXT: retq 314 %bc1 = bitcast <8 x half> %y to <8 x i16> 315 %xor = xor <8 x i16> %bc1, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768> 316 %bc2 = bitcast <8 x i16> %xor to <8 x half> 317 %fadd = fadd <8 x half> %x, %bc2 318 ret <8 x half> %fadd 319} 320 321define <8 x half> @fadd_bitcast_fneg_vec_undef_elts(<8 x half> %x, <8 x half> %y) { 322; CHECK-LABEL: fadd_bitcast_fneg_vec_undef_elts: 323; CHECK: # %bb.0: 324; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 325; CHECK-NEXT: retq 326 %bc1 = bitcast <8 x half> %y to <8 x i16> 327 %xor = xor <8 x i16> %bc1, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 undef, i16 32768> 328 %bc2 = bitcast <8 x i16> %xor to <8 x half> 329 %fadd = fadd <8 x half> %x, %bc2 330 ret <8 x half> %fadd 331} 332 333define <8 x half> @fsub_bitcast_fneg_vec(<8 x half> %x, <8 x half> %y) { 334; CHECK-LABEL: fsub_bitcast_fneg_vec: 335; CHECK: # %bb.0: 336; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 337; CHECK-NEXT: retq 338 %bc1 = bitcast <8 x half> %y to <8 x i16> 339 %xor = xor <8 x i16> %bc1, <i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768> 340 %bc2 = bitcast <8 x i16> %xor to <8 x half> 341 %fsub = fsub <8 x half> %x, %bc2 342 ret <8 x half> %fsub 343} 344 345define <8 x half> @fsub_bitcast_fneg_vec_undef_elts(<8 x half> %x, <8 x half> %y) { 346; CHECK-LABEL: fsub_bitcast_fneg_vec_undef_elts: 347; CHECK: # %bb.0: 348; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 349; CHECK-NEXT: retq 350 %bc1 = bitcast <8 x half> %y to <8 x i16> 351 %xor = xor <8 x i16> %bc1, <i16 undef, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 32768, i16 undef> 352 %bc2 = bitcast <8 x i16> %xor to <8 x half> 353 %fsub = fsub <8 x half> %x, %bc2 354 ret <8 x half> %fsub 355} 356 357define <8 x half> @fadd_bitcast_fneg_vec_width(<8 x half> %x, <8 x half> %y) { 358; CHECK-LABEL: fadd_bitcast_fneg_vec_width: 359; CHECK: # %bb.0: 360; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 361; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 362; CHECK-NEXT: retq 363 %bc1 = bitcast <8 x half> %y to <2 x i64> 364 %xor = xor <2 x i64> %bc1, <i64 -9223231297218904064, i64 -9223231297218904064> 365 %bc2 = bitcast <2 x i64> %xor to <8 x half> 366 %fadd = fadd <8 x half> %x, %bc2 367 ret <8 x half> %fadd 368} 369 370define <8 x half> @fsub_bitcast_fneg_vec_width(<8 x half> %x, <8 x half> %y) { 371; CHECK-LABEL: fsub_bitcast_fneg_vec_width: 372; CHECK: # %bb.0: 373; CHECK-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm1 374; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 375; CHECK-NEXT: retq 376 %bc1 = bitcast <8 x half> %y to <2 x i64> 377 %xor = xor <2 x i64> %bc1, <i64 -9223231297218904064, i64 -9223231297218904064> 378 %bc2 = bitcast <2 x i64> %xor to <8 x half> 379 %fsub = fsub <8 x half> %x, %bc2 380 ret <8 x half> %fsub 381} 382