1// WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8/// 9/// \file 10/// WebAssembly SIMD operand code-gen constructs. 11/// 12//===----------------------------------------------------------------------===// 13 14// Instructions using the SIMD opcode prefix and requiring one of the SIMD 15// feature predicates. 16multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 17 list<dag> pattern_r, string asmstr_r, 18 string asmstr_s, bits<32> simdop, 19 list<Predicate> reqs> { 20 defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s, 21 !if(!ge(simdop, 0x100), 22 !or(0xfd0000, !and(0xffff, simdop)), 23 !or(0xfd00, !and(0xff, simdop)))>, 24 Requires<reqs>; 25} 26 27multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 28 list<dag> pattern_r, string asmstr_r = "", 29 string asmstr_s = "", bits<32> simdop = -1, 30 list<Predicate> reqs = []> { 31 defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, 32 asmstr_s, simdop, !listconcat([HasSIMD128], reqs)>; 33} 34 35multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 36 list<dag> pattern_r, string asmstr_r = "", 37 string asmstr_s = "", bits<32> simdop = -1> { 38 defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, 39 asmstr_s, simdop, [HasRelaxedSIMD]>; 40} 41 42multiclass HALF_PRECISION_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s, 43 list<dag> pattern_r, string asmstr_r = "", 44 string asmstr_s = "", bits<32> simdop = -1> { 45 defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, 46 asmstr_s, simdop, [HasFP16]>; 47} 48 49 50defm "" : ARGUMENT<V128, v16i8>; 51defm "" : ARGUMENT<V128, v8i16>; 52defm "" : ARGUMENT<V128, v4i32>; 53defm "" : ARGUMENT<V128, v2i64>; 54defm "" : ARGUMENT<V128, v4f32>; 55defm "" : ARGUMENT<V128, v2f64>; 56defm "" : ARGUMENT<V128, v8f16>; 57 58// Constrained immediate argument types. Allow any value from the minimum signed 59// value to the maximum unsigned value for the lane size. 60foreach SIZE = [8, 16] in 61def ImmI#SIZE : ImmLeaf<i32, 62 // -2^(n-1) <= Imm < 2^n 63 "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << "#SIZE#");" 64>; 65foreach SIZE = [2, 4, 8, 16, 32] in 66def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">; 67 68class Vec { 69 ValueType vt; 70 ValueType int_vt; 71 ValueType lane_vt; 72 WebAssemblyRegClass lane_rc; 73 int lane_bits; 74 ImmLeaf lane_idx; 75 SDPatternOperator lane_load; 76 PatFrag splat; 77 string prefix; 78 Vec split; 79} 80 81def I8x16 : Vec { 82 let vt = v16i8; 83 let int_vt = vt; 84 let lane_vt = i32; 85 let lane_rc = I32; 86 let lane_bits = 8; 87 let lane_idx = LaneIdx16; 88 let lane_load = extloadi8; 89 let splat = PatFrag<(ops node:$x), (v16i8 (splat_vector (i8 $x)))>; 90 let prefix = "i8x16"; 91} 92 93def I16x8 : Vec { 94 let vt = v8i16; 95 let int_vt = vt; 96 let lane_vt = i32; 97 let lane_rc = I32; 98 let lane_bits = 16; 99 let lane_idx = LaneIdx8; 100 let lane_load = extloadi16; 101 let splat = PatFrag<(ops node:$x), (v8i16 (splat_vector (i16 $x)))>; 102 let prefix = "i16x8"; 103 let split = I8x16; 104} 105 106def I32x4 : Vec { 107 let vt = v4i32; 108 let int_vt = vt; 109 let lane_vt = i32; 110 let lane_rc = I32; 111 let lane_bits = 32; 112 let lane_idx = LaneIdx4; 113 let lane_load = load; 114 let splat = PatFrag<(ops node:$x), (v4i32 (splat_vector (i32 $x)))>; 115 let prefix = "i32x4"; 116 let split = I16x8; 117} 118 119def I64x2 : Vec { 120 let vt = v2i64; 121 let int_vt = vt; 122 let lane_vt = i64; 123 let lane_rc = I64; 124 let lane_bits = 64; 125 let lane_idx = LaneIdx2; 126 let lane_load = load; 127 let splat = PatFrag<(ops node:$x), (v2i64 (splat_vector (i64 $x)))>; 128 let prefix = "i64x2"; 129 let split = I32x4; 130} 131 132def F32x4 : Vec { 133 let vt = v4f32; 134 let int_vt = v4i32; 135 let lane_vt = f32; 136 let lane_rc = F32; 137 let lane_bits = 32; 138 let lane_idx = LaneIdx4; 139 let lane_load = load; 140 let splat = PatFrag<(ops node:$x), (v4f32 (splat_vector (f32 $x)))>; 141 let prefix = "f32x4"; 142} 143 144def F64x2 : Vec { 145 let vt = v2f64; 146 let int_vt = v2i64; 147 let lane_vt = f64; 148 let lane_rc = F64; 149 let lane_bits = 64; 150 let lane_idx = LaneIdx2; 151 let lane_load = load; 152 let splat = PatFrag<(ops node:$x), (v2f64 (splat_vector (f64 $x)))>; 153 let prefix = "f64x2"; 154} 155 156def F16x8 : Vec { 157 let vt = v8f16; 158 let int_vt = v8i16; 159 let lane_vt = f32; 160 let lane_rc = F32; 161 let lane_bits = 16; 162 let lane_idx = LaneIdx8; 163 let lane_load = int_wasm_loadf16_f32; 164 let splat = PatFrag<(ops node:$x), (v8f16 (splat_vector (f16 $x)))>; 165 let prefix = "f16x8"; 166} 167 168// TODO: Remove StdVecs when the F16x8 works every where StdVecs is used. 169defvar StdVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; 170defvar AllVecs = !listconcat(StdVecs, [F16x8]); 171defvar IntVecs = [I8x16, I16x8, I32x4, I64x2]; 172 173//===----------------------------------------------------------------------===// 174// Load and store 175//===----------------------------------------------------------------------===// 176 177// Load: v128.load 178let mayLoad = 1, UseNamedOperandTable = 1 in { 179defm LOAD_V128_A32 : 180 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 181 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 182 "v128.load\t$dst, ${off}(${addr})$p2align", 183 "v128.load\t$off$p2align", 0>; 184defm LOAD_V128_A64 : 185 SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 186 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 187 "v128.load\t$dst, ${off}(${addr})$p2align", 188 "v128.load\t$off$p2align", 0>; 189} 190 191// Def load patterns from WebAssemblyInstrMemory.td for vector types 192foreach vec = AllVecs in { 193defm : LoadPat<vec.vt, load, "LOAD_V128">; 194} 195 196// v128.loadX_splat 197multiclass SIMDLoadSplat<int size, bits<32> simdop> { 198 let mayLoad = 1, UseNamedOperandTable = 1 in { 199 defm LOAD#size#_SPLAT_A32 : 200 SIMD_I<(outs V128:$dst), 201 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 202 (outs), 203 (ins P2Align:$p2align, offset32_op:$off), [], 204 "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", 205 "v128.load"#size#"_splat\t$off$p2align", simdop>; 206 defm LOAD#size#_SPLAT_A64 : 207 SIMD_I<(outs V128:$dst), 208 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 209 (outs), 210 (ins P2Align:$p2align, offset64_op:$off), [], 211 "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align", 212 "v128.load"#size#"_splat\t$off$p2align", simdop>; 213 } 214} 215 216defm "" : SIMDLoadSplat<8, 7>; 217defm "" : SIMDLoadSplat<16, 8>; 218defm "" : SIMDLoadSplat<32, 9>; 219defm "" : SIMDLoadSplat<64, 10>; 220 221foreach vec = StdVecs in { 222 defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; 223 defm : LoadPat<vec.vt, 224 PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>, 225 inst>; 226} 227 228// Load and extend 229multiclass SIMDLoadExtend<Vec vec, string loadPat, bits<32> simdop> { 230 defvar signed = vec.prefix#".load"#loadPat#"_s"; 231 defvar unsigned = vec.prefix#".load"#loadPat#"_u"; 232 let mayLoad = 1, UseNamedOperandTable = 1 in { 233 defm LOAD_EXTEND_S_#vec#_A32 : 234 SIMD_I<(outs V128:$dst), 235 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 236 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 237 signed#"\t$dst, ${off}(${addr})$p2align", 238 signed#"\t$off$p2align", simdop>; 239 defm LOAD_EXTEND_U_#vec#_A32 : 240 SIMD_I<(outs V128:$dst), 241 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 242 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 243 unsigned#"\t$dst, ${off}(${addr})$p2align", 244 unsigned#"\t$off$p2align", !add(simdop, 1)>; 245 defm LOAD_EXTEND_S_#vec#_A64 : 246 SIMD_I<(outs V128:$dst), 247 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 248 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 249 signed#"\t$dst, ${off}(${addr})$p2align", 250 signed#"\t$off$p2align", simdop>; 251 defm LOAD_EXTEND_U_#vec#_A64 : 252 SIMD_I<(outs V128:$dst), 253 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 254 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 255 unsigned#"\t$dst, ${off}(${addr})$p2align", 256 unsigned#"\t$off$p2align", !add(simdop, 1)>; 257 } 258} 259 260defm "" : SIMDLoadExtend<I16x8, "8x8", 1>; 261defm "" : SIMDLoadExtend<I32x4, "16x4", 3>; 262defm "" : SIMDLoadExtend<I64x2, "32x2", 5>; 263 264foreach vec = [I16x8, I32x4, I64x2] in 265foreach exts = [["sextloadvi", "_S"], 266 ["zextloadvi", "_U"], 267 ["extloadvi", "_U"]] in { 268defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits); 269defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec; 270defm : LoadPat<vec.vt, loadpat, inst>; 271} 272 273// Load lane into zero vector 274multiclass SIMDLoadZero<Vec vec, bits<32> simdop> { 275 defvar name = "v128.load"#vec.lane_bits#"_zero"; 276 let mayLoad = 1, UseNamedOperandTable = 1 in { 277 defm LOAD_ZERO_#vec.lane_bits#_A32 : 278 SIMD_I<(outs V128:$dst), 279 (ins P2Align:$p2align, offset32_op:$off, I32:$addr), 280 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 281 name#"\t$dst, ${off}(${addr})$p2align", 282 name#"\t$off$p2align", simdop>; 283 defm LOAD_ZERO_#vec.lane_bits#_A64 : 284 SIMD_I<(outs V128:$dst), 285 (ins P2Align:$p2align, offset64_op:$off, I64:$addr), 286 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 287 name#"\t$dst, ${off}(${addr})$p2align", 288 name#"\t$off$p2align", simdop>; 289 } // mayLoad = 1, UseNamedOperandTable = 1 290} 291 292defm "" : SIMDLoadZero<I32x4, 0x5c>; 293defm "" : SIMDLoadZero<I64x2, 0x5d>; 294 295// Use load_zero to load scalars into vectors as well where possible. 296// TODO: i16, and i8 scalars 297foreach vec = [I32x4, I64x2, F32x4, F64x2] in { 298 defvar inst = "LOAD_ZERO_"#vec.lane_bits; 299 defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>; 300 defm : LoadPat<vec.vt, pat, inst>; 301} 302 303// TODO: f32x4 and f64x2 as well 304foreach vec = [I32x4, I64x2] in { 305 defvar inst = "LOAD_ZERO_"#vec.lane_bits; 306 defvar pat = PatFrag<(ops node:$ptr), 307 (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>; 308 defm : LoadPat<vec.vt, pat, inst>; 309} 310 311// Load lane 312multiclass SIMDLoadLane<bits<32> lane_bits, bits<32> simdop> { 313 defvar name = "v128.load"#lane_bits#"_lane"; 314 let mayLoad = 1, UseNamedOperandTable = 1 in { 315 defm LOAD_LANE_#lane_bits#_A32 : 316 SIMD_I<(outs V128:$dst), 317 (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, 318 I32:$addr, V128:$vec), 319 (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), 320 [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", 321 name#"\t$off$p2align, $idx", simdop>; 322 defm LOAD_LANE_#lane_bits#_A64 : 323 SIMD_I<(outs V128:$dst), 324 (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, 325 I64:$addr, V128:$vec), 326 (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), 327 [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx", 328 name#"\t$off$p2align, $idx", simdop>; 329 } // mayLoad = 1, UseNamedOperandTable = 1 330} 331 332defm "" : SIMDLoadLane<8, 0x54>; 333defm "" : SIMDLoadLane<16, 0x55>; 334defm "" : SIMDLoadLane<32, 0x56>; 335defm "" : SIMDLoadLane<64, 0x57>; 336 337// Select loads with no constant offset. 338multiclass LoadLanePatNoOffset<Vec vec, SDPatternOperator kind> { 339 defvar load_lane_a32 = !cast<NI>("LOAD_LANE_"#vec.lane_bits#"_A32"); 340 defvar load_lane_a64 = !cast<NI>("LOAD_LANE_"#vec.lane_bits#"_A64"); 341 def : Pat<(vec.vt (kind (i32 I32:$addr), 342 (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), 343 (load_lane_a32 0, 0, imm:$idx, $addr, $vec)>, 344 Requires<[HasAddr32]>; 345 def : Pat<(vec.vt (kind (i64 I64:$addr), 346 (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))), 347 (load_lane_a64 0, 0, imm:$idx, $addr, $vec)>, 348 Requires<[HasAddr64]>; 349} 350 351def load8_lane : 352 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 353 (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>; 354def load16_lane : 355 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 356 (vector_insert $vec, (i32 (extloadi16 $ptr)), $idx)>; 357def load32_lane : 358 PatFrags<(ops node:$ptr, node:$vec, node:$idx), [ 359 (vector_insert $vec, (i32 (load $ptr)), $idx), 360 (vector_insert $vec, (f32 (load $ptr)), $idx) 361]>; 362def load64_lane : 363 PatFrags<(ops node:$ptr, node:$vec, node:$idx), [ 364 (vector_insert $vec, (i64 (load $ptr)), $idx), 365 (vector_insert $vec, (f64 (load $ptr)), $idx) 366]>; 367 368defm : LoadLanePatNoOffset<I8x16, load8_lane>; 369defm : LoadLanePatNoOffset<I16x8, load16_lane>; 370defm : LoadLanePatNoOffset<I32x4, load32_lane>; 371defm : LoadLanePatNoOffset<I64x2, load64_lane>; 372defm : LoadLanePatNoOffset<F32x4, load32_lane>; 373defm : LoadLanePatNoOffset<F64x2, load64_lane>; 374 375// TODO: Also support the other load patterns for load_lane once the instructions 376// are merged to the proposal. 377 378// Store: v128.store 379let mayStore = 1, UseNamedOperandTable = 1 in { 380defm STORE_V128_A32 : 381 SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec), 382 (outs), (ins P2Align:$p2align, offset32_op:$off), [], 383 "v128.store\t${off}(${addr})$p2align, $vec", 384 "v128.store\t$off$p2align", 11>; 385defm STORE_V128_A64 : 386 SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec), 387 (outs), (ins P2Align:$p2align, offset64_op:$off), [], 388 "v128.store\t${off}(${addr})$p2align, $vec", 389 "v128.store\t$off$p2align", 11>; 390} 391 392// Def store patterns from WebAssemblyInstrMemory.td for vector types 393foreach vec = AllVecs in { 394defm : StorePat<vec.vt, store, "STORE_V128">; 395} 396 397// Store lane 398multiclass SIMDStoreLane<Vec vec, bits<32> simdop> { 399 defvar name = "v128.store"#vec.lane_bits#"_lane"; 400 let mayStore = 1, UseNamedOperandTable = 1 in { 401 defm STORE_LANE_#vec#_A32 : 402 SIMD_I<(outs), 403 (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx, 404 I32:$addr, V128:$vec), 405 (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx), 406 [], name#"\t${off}(${addr})$p2align, $vec, $idx", 407 name#"\t$off$p2align, $idx", simdop>; 408 defm STORE_LANE_#vec#_A64 : 409 SIMD_I<(outs), 410 (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx, 411 I64:$addr, V128:$vec), 412 (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx), 413 [], name#"\t${off}(${addr})$p2align, $vec, $idx", 414 name#"\t$off$p2align, $idx", simdop>; 415 } // mayStore = 1, UseNamedOperandTable = 1 416} 417 418defm "" : SIMDStoreLane<I8x16, 0x58>; 419defm "" : SIMDStoreLane<I16x8, 0x59>; 420defm "" : SIMDStoreLane<I32x4, 0x5a>; 421defm "" : SIMDStoreLane<I64x2, 0x5b>; 422 423multiclass StoreLanePat<Vec vec, SDPatternOperator kind> { 424 def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr), 425 (vec.vt V128:$vec), 426 (i32 vec.lane_idx:$idx)), 427 (!cast<NI>("STORE_LANE_"#vec#"_A32") 0, $offset, imm:$idx, $addr, $vec)>, 428 Requires<[HasAddr32]>; 429 def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr), 430 (vec.vt V128:$vec), 431 (i32 vec.lane_idx:$idx)), 432 (!cast<NI>("STORE_LANE_"#vec#"_A64") 0, $offset, imm:$idx, $addr, $vec)>, 433 Requires<[HasAddr64]>; 434} 435 436def store8_lane : 437 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 438 (truncstorei8 (i32 (vector_extract $vec, $idx)), $ptr)>; 439def store16_lane : 440 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 441 (truncstorei16 (i32 (vector_extract $vec, $idx)), $ptr)>; 442def store32_lane : 443 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 444 (store (i32 (vector_extract $vec, $idx)), $ptr)>; 445def store64_lane : 446 PatFrag<(ops node:$ptr, node:$vec, node:$idx), 447 (store (i64 (vector_extract $vec, $idx)), $ptr)>; 448// TODO: floating point lanes as well 449 450let AddedComplexity = 1 in { 451defm : StoreLanePat<I8x16, store8_lane>; 452defm : StoreLanePat<I16x8, store16_lane>; 453defm : StoreLanePat<I32x4, store32_lane>; 454defm : StoreLanePat<I64x2, store64_lane>; 455} 456 457//===----------------------------------------------------------------------===// 458// Constructing SIMD values 459//===----------------------------------------------------------------------===// 460 461// Constant: v128.const 462multiclass ConstVec<Vec vec, dag ops, dag pat, string args> { 463 let isMoveImm = 1, isReMaterializable = 1 in 464 defm CONST_V128_#vec : SIMD_I<(outs V128:$dst), ops, (outs), ops, 465 [(set V128:$dst, (vec.vt pat))], 466 "v128.const\t$dst, "#args, 467 "v128.const\t"#args, 12>; 468} 469 470defm "" : ConstVec<I8x16, 471 (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1, 472 vec_i8imm_op:$i2, vec_i8imm_op:$i3, 473 vec_i8imm_op:$i4, vec_i8imm_op:$i5, 474 vec_i8imm_op:$i6, vec_i8imm_op:$i7, 475 vec_i8imm_op:$i8, vec_i8imm_op:$i9, 476 vec_i8imm_op:$iA, vec_i8imm_op:$iB, 477 vec_i8imm_op:$iC, vec_i8imm_op:$iD, 478 vec_i8imm_op:$iE, vec_i8imm_op:$iF), 479 (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3, 480 ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7, 481 ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB, 482 ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF), 483 !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ", 484 "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>; 485defm "" : ConstVec<I16x8, 486 (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1, 487 vec_i16imm_op:$i2, vec_i16imm_op:$i3, 488 vec_i16imm_op:$i4, vec_i16imm_op:$i5, 489 vec_i16imm_op:$i6, vec_i16imm_op:$i7), 490 (build_vector 491 ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3, 492 ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7), 493 "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">; 494let IsCanonical = 1 in 495defm "" : ConstVec<I32x4, 496 (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1, 497 vec_i32imm_op:$i2, vec_i32imm_op:$i3), 498 (build_vector (i32 imm:$i0), (i32 imm:$i1), 499 (i32 imm:$i2), (i32 imm:$i3)), 500 "$i0, $i1, $i2, $i3">; 501defm "" : ConstVec<I64x2, 502 (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1), 503 (build_vector (i64 imm:$i0), (i64 imm:$i1)), 504 "$i0, $i1">; 505defm "" : ConstVec<F32x4, 506 (ins f32imm_op:$i0, f32imm_op:$i1, 507 f32imm_op:$i2, f32imm_op:$i3), 508 (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1), 509 (f32 fpimm:$i2), (f32 fpimm:$i3)), 510 "$i0, $i1, $i2, $i3">; 511defm "" : ConstVec<F64x2, 512 (ins f64imm_op:$i0, f64imm_op:$i1), 513 (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)), 514 "$i0, $i1">; 515 516// Match splat(x) -> const.v128(x, ..., x) 517foreach vec = StdVecs in { 518 defvar numEls = !div(vec.vt.Size, vec.lane_bits); 519 defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64)); 520 defvar immKind = !if(isFloat, fpimm, imm); 521 def : Pat<(vec.splat (vec.lane_vt immKind:$x)), 522 !dag(!cast<NI>("CONST_V128_"#vec), 523 !listsplat((vec.lane_vt immKind:$x), numEls), 524 ?)>; 525} 526 527// Shuffle lanes: shuffle 528defm SHUFFLE : 529 SIMD_I<(outs V128:$dst), 530 (ins V128:$x, V128:$y, 531 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 532 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 533 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 534 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 535 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 536 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 537 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 538 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 539 (outs), 540 (ins 541 vec_i8imm_op:$m0, vec_i8imm_op:$m1, 542 vec_i8imm_op:$m2, vec_i8imm_op:$m3, 543 vec_i8imm_op:$m4, vec_i8imm_op:$m5, 544 vec_i8imm_op:$m6, vec_i8imm_op:$m7, 545 vec_i8imm_op:$m8, vec_i8imm_op:$m9, 546 vec_i8imm_op:$mA, vec_i8imm_op:$mB, 547 vec_i8imm_op:$mC, vec_i8imm_op:$mD, 548 vec_i8imm_op:$mE, vec_i8imm_op:$mF), 549 [], 550 "i8x16.shuffle\t$dst, $x, $y, "# 551 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 552 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 553 "i8x16.shuffle\t"# 554 "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "# 555 "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF", 556 13>; 557 558// Shuffles after custom lowering 559def wasm_shuffle_t : SDTypeProfile<1, 18, []>; 560def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; 561foreach vec = StdVecs in { 562// The @llvm.wasm.shuffle intrinsic has immediate arguments that become TargetConstants. 563def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y), 564 (i32 timm:$m0), (i32 timm:$m1), 565 (i32 timm:$m2), (i32 timm:$m3), 566 (i32 timm:$m4), (i32 timm:$m5), 567 (i32 timm:$m6), (i32 timm:$m7), 568 (i32 timm:$m8), (i32 timm:$m9), 569 (i32 timm:$mA), (i32 timm:$mB), 570 (i32 timm:$mC), (i32 timm:$mD), 571 (i32 timm:$mE), (i32 timm:$mF))), 572 (SHUFFLE $x, $y, 573 imm:$m0, imm:$m1, imm:$m2, imm:$m3, 574 imm:$m4, imm:$m5, imm:$m6, imm:$m7, 575 imm:$m8, imm:$m9, imm:$mA, imm:$mB, 576 imm:$mC, imm:$mD, imm:$mE, imm:$mF)>; 577// Normal shufflevector instructions may have normal constant arguemnts. 578def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y), 579 (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1), 580 (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3), 581 (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5), 582 (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7), 583 (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9), 584 (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB), 585 (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD), 586 (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))), 587 (SHUFFLE $x, $y, 588 imm:$m0, imm:$m1, imm:$m2, imm:$m3, 589 imm:$m4, imm:$m5, imm:$m6, imm:$m7, 590 imm:$m8, imm:$m9, imm:$mA, imm:$mB, 591 imm:$mC, imm:$mD, imm:$mE, imm:$mF)>; 592} 593 594// Swizzle lanes: i8x16.swizzle 595def wasm_swizzle_t : SDTypeProfile<1, 2, []>; 596def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; 597defm SWIZZLE : 598 SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), 599 [(set (v16i8 V128:$dst), 600 (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], 601 "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>; 602 603def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)), 604 (SWIZZLE $src, $mask)>; 605 606multiclass Splat<Vec vec, bits<32> simdop> { 607 defm SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins vec.lane_rc:$x), 608 (outs), (ins), 609 [(set (vec.vt V128:$dst), 610 (vec.splat vec.lane_rc:$x))], 611 vec.prefix#".splat\t$dst, $x", vec.prefix#".splat", 612 simdop>; 613} 614 615defm "" : Splat<I8x16, 15>; 616defm "" : Splat<I16x8, 16>; 617defm "" : Splat<I32x4, 17>; 618defm "" : Splat<I64x2, 18>; 619defm "" : Splat<F32x4, 19>; 620defm "" : Splat<F64x2, 20>; 621 622// Half values are not fully supported so an intrinsic is used instead of a 623// regular Splat pattern as above. 624defm SPLAT_F16x8 : 625 HALF_PRECISION_I<(outs V128:$dst), (ins F32:$x), 626 (outs), (ins), 627 [(set (v8f16 V128:$dst), (int_wasm_splat_f16x8 F32:$x))], 628 "f16x8.splat\t$dst, $x", "f16x8.splat", 0x120>; 629 630// scalar_to_vector leaves high lanes undefined, so can be a splat 631foreach vec = StdVecs in 632def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))), 633 (!cast<Instruction>("SPLAT_"#vec) $x)>; 634 635//===----------------------------------------------------------------------===// 636// Accessing lanes 637//===----------------------------------------------------------------------===// 638 639// Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u 640multiclass ExtractLane<Vec vec, bits<32> simdop, string suffix = ""> { 641 defm EXTRACT_LANE_#vec#suffix : 642 SIMD_I<(outs vec.lane_rc:$dst), (ins V128:$vec, vec_i8imm_op:$idx), 643 (outs), (ins vec_i8imm_op:$idx), [], 644 vec.prefix#".extract_lane"#suffix#"\t$dst, $vec, $idx", 645 vec.prefix#".extract_lane"#suffix#"\t$idx", simdop>; 646} 647 648defm "" : ExtractLane<I8x16, 21, "_s">; 649defm "" : ExtractLane<I8x16, 22, "_u">; 650defm "" : ExtractLane<I16x8, 24, "_s">; 651defm "" : ExtractLane<I16x8, 25, "_u">; 652defm "" : ExtractLane<I32x4, 27>; 653defm "" : ExtractLane<I64x2, 29>; 654defm "" : ExtractLane<F32x4, 31>; 655defm "" : ExtractLane<F64x2, 33>; 656 657def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), 658 (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>; 659def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), 660 (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>; 661def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)), 662 (EXTRACT_LANE_I32x4 $vec, imm:$idx)>; 663def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)), 664 (EXTRACT_LANE_F32x4 $vec, imm:$idx)>; 665def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)), 666 (EXTRACT_LANE_I64x2 $vec, imm:$idx)>; 667def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)), 668 (EXTRACT_LANE_F64x2 $vec, imm:$idx)>; 669 670def : Pat< 671 (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8), 672 (EXTRACT_LANE_I8x16_s $vec, imm:$idx)>; 673def : Pat< 674 (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)), 675 (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>; 676def : Pat< 677 (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16), 678 (EXTRACT_LANE_I16x8_s $vec, imm:$idx)>; 679def : Pat< 680 (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)), 681 (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>; 682 683defm EXTRACT_LANE_F16x8 : 684 HALF_PRECISION_I<(outs F32:$dst), (ins V128:$vec, vec_i8imm_op:$idx), 685 (outs), (ins vec_i8imm_op:$idx), 686 [(set (f32 F32:$dst), (int_wasm_extract_lane_f16x8 687 (v8f16 V128:$vec), (i32 LaneIdx8:$idx)))], 688 "f16x8.extract_lane\t$dst, $vec, $idx", 689 "f16x8.extract_lane\t$idx", 0x121>; 690 691// Replace lane value: replace_lane 692multiclass ReplaceLane<Vec vec, bits<32> simdop> { 693 defm REPLACE_LANE_#vec : 694 SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, vec.lane_rc:$x), 695 (outs), (ins vec_i8imm_op:$idx), 696 [(set V128:$dst, (vector_insert 697 (vec.vt V128:$vec), 698 (vec.lane_vt vec.lane_rc:$x), 699 (i32 vec.lane_idx:$idx)))], 700 vec.prefix#".replace_lane\t$dst, $vec, $idx, $x", 701 vec.prefix#".replace_lane\t$idx", simdop>; 702} 703 704defm "" : ReplaceLane<I8x16, 23>; 705defm "" : ReplaceLane<I16x8, 26>; 706defm "" : ReplaceLane<I32x4, 28>; 707defm "" : ReplaceLane<I64x2, 30>; 708defm "" : ReplaceLane<F32x4, 32>; 709defm "" : ReplaceLane<F64x2, 34>; 710 711// For now use an intrinsic for f16x8.replace_lane instead of ReplaceLane above 712// since LLVM IR generated with half type arguments is not well supported and 713// creates conversions from f16->f32. 714defm REPLACE_LANE_F16x8 : 715 HALF_PRECISION_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, F32:$x), 716 (outs), (ins vec_i8imm_op:$idx), 717 [(set (v8f16 V128:$dst), (int_wasm_replace_lane_f16x8 718 (v8f16 V128:$vec), 719 (i32 LaneIdx8:$idx), 720 (f32 F32:$x)))], 721 "f16x8.replace_lane\t$dst, $vec, $idx, $x", 722 "f16x8.replace_lane\t$idx", 0x122>; 723 724// Lower undef lane indices to zero 725def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), 726 (REPLACE_LANE_I8x16 $vec, 0, $x)>; 727def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef), 728 (REPLACE_LANE_I16x8 $vec, 0, $x)>; 729def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef), 730 (REPLACE_LANE_I32x4 $vec, 0, $x)>; 731def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef), 732 (REPLACE_LANE_I64x2 $vec, 0, $x)>; 733def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef), 734 (REPLACE_LANE_F32x4 $vec, 0, $x)>; 735def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef), 736 (REPLACE_LANE_F64x2 $vec, 0, $x)>; 737 738//===----------------------------------------------------------------------===// 739// Comparisons 740//===----------------------------------------------------------------------===// 741 742multiclass SIMDCondition<Vec vec, string name, CondCode cond, bits<32> simdop, 743 list<Predicate> reqs = []> { 744 defm _#vec : 745 SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 746 [(set (vec.int_vt V128:$dst), 747 (setcc (vec.vt V128:$lhs), (vec.vt V128:$rhs), cond))], 748 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 749 vec.prefix#"."#name, simdop, reqs>; 750} 751 752multiclass HalfPrecisionCondition<Vec vec, string name, CondCode cond, 753 bits<32> simdop> { 754 defm "" : SIMDCondition<vec, name, cond, simdop, [HasFP16]>; 755} 756 757multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> { 758 defm "" : SIMDCondition<I8x16, name, cond, baseInst>; 759 defm "" : SIMDCondition<I16x8, name, cond, !add(baseInst, 10)>; 760 defm "" : SIMDCondition<I32x4, name, cond, !add(baseInst, 20)>; 761} 762 763multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> { 764 defm "" : SIMDCondition<F32x4, name, cond, baseInst>; 765 defm "" : SIMDCondition<F64x2, name, cond, !add(baseInst, 6)>; 766 defm "" : HalfPrecisionCondition<F16x8, name, cond, !add(baseInst, 246)>; 767} 768 769// Equality: eq 770let isCommutable = 1 in { 771defm EQ : SIMDConditionInt<"eq", SETEQ, 35>; 772defm EQ : SIMDCondition<I64x2, "eq", SETEQ, 214>; 773defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>; 774} // isCommutable = 1 775 776// Non-equality: ne 777let isCommutable = 1 in { 778defm NE : SIMDConditionInt<"ne", SETNE, 36>; 779defm NE : SIMDCondition<I64x2, "ne", SETNE, 215>; 780defm NE : SIMDConditionFP<"ne", SETUNE, 66>; 781} // isCommutable = 1 782 783// Less than: lt_s / lt_u / lt 784defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>; 785defm LT_S : SIMDCondition<I64x2, "lt_s", SETLT, 216>; 786defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>; 787defm LT : SIMDConditionFP<"lt", SETOLT, 67>; 788 789// Greater than: gt_s / gt_u / gt 790defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>; 791defm GT_S : SIMDCondition<I64x2, "gt_s", SETGT, 217>; 792defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>; 793defm GT : SIMDConditionFP<"gt", SETOGT, 68>; 794 795// Less than or equal: le_s / le_u / le 796defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>; 797defm LE_S : SIMDCondition<I64x2, "le_s", SETLE, 218>; 798defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>; 799defm LE : SIMDConditionFP<"le", SETOLE, 69>; 800 801// Greater than or equal: ge_s / ge_u / ge 802defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>; 803defm GE_S : SIMDCondition<I64x2, "ge_s", SETGE, 219>; 804defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>; 805defm GE : SIMDConditionFP<"ge", SETOGE, 70>; 806 807// Lower float comparisons that don't care about NaN to standard WebAssembly 808// float comparisons. These instructions are generated with nnan and in the 809// target-independent expansion of unordered comparisons and ordered ne. 810foreach nodes = [[seteq, EQ_F32x4], [setne, NE_F32x4], [setlt, LT_F32x4], 811 [setgt, GT_F32x4], [setle, LE_F32x4], [setge, GE_F32x4]] in 812def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 813 (nodes[1] $lhs, $rhs)>; 814 815foreach nodes = [[seteq, EQ_F64x2], [setne, NE_F64x2], [setlt, LT_F64x2], 816 [setgt, GT_F64x2], [setle, LE_F64x2], [setge, GE_F64x2]] in 817def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 818 (nodes[1] $lhs, $rhs)>; 819 820//===----------------------------------------------------------------------===// 821// Bitwise operations 822//===----------------------------------------------------------------------===// 823 824multiclass SIMDBinary<Vec vec, SDPatternOperator node, string name, 825 bits<32> simdop, list<Predicate> reqs = []> { 826 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 827 (outs), (ins), 828 [(set (vec.vt V128:$dst), 829 (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], 830 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 831 vec.prefix#"."#name, simdop, reqs>; 832} 833 834multiclass HalfPrecisionBinary<Vec vec, SDPatternOperator node, string name, 835 bits<32> simdop> { 836 defm "" : SIMDBinary<vec, node, name, simdop, [HasFP16]>; 837} 838 839multiclass SIMDBitwise<SDPatternOperator node, string name, bits<32> simdop, 840 bit commutable = false> { 841 let isCommutable = commutable in 842 defm "" : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 843 (outs), (ins), [], 844 "v128."#name#"\t$dst, $lhs, $rhs", "v128."#name, simdop>; 845 foreach vec = IntVecs in 846 def : Pat<(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 847 (!cast<NI>(NAME) $lhs, $rhs)>; 848} 849 850multiclass SIMDUnary<Vec vec, SDPatternOperator node, string name, 851 bits<32> simdop, list<Predicate> reqs = []> { 852 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), 853 [(set (vec.vt V128:$dst), 854 (vec.vt (node (vec.vt V128:$v))))], 855 vec.prefix#"."#name#"\t$dst, $v", 856 vec.prefix#"."#name, simdop, reqs>; 857} 858 859multiclass HalfPrecisionUnary<Vec vec, SDPatternOperator node, string name, 860 bits<32> simdop> { 861 defm "" : SIMDUnary<vec, node, name, simdop, [HasFP16]>; 862} 863 864// Bitwise logic: v128.not 865defm NOT : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [], 866 "v128.not\t$dst, $v", "v128.not", 77>; 867foreach vec = IntVecs in 868def : Pat<(vnot (vec.vt V128:$v)), (NOT $v)>; 869 870// Bitwise logic: v128.and / v128.or / v128.xor 871defm AND : SIMDBitwise<and, "and", 78, true>; 872defm OR : SIMDBitwise<or, "or", 80, true>; 873defm XOR : SIMDBitwise<xor, "xor", 81, true>; 874 875// Bitwise logic: v128.andnot 876def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>; 877defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>; 878 879// Bitwise select: v128.bitselect 880defm BITSELECT : 881 SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [], 882 "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; 883 884foreach vec = StdVecs in 885def : Pat<(vec.vt (int_wasm_bitselect 886 (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))), 887 (BITSELECT $v1, $v2, $c)>; 888 889// Bitselect is equivalent to (c & v1) | (~c & v2) 890foreach vec = IntVecs in 891def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)), 892 (and (vnot V128:$c), (vec.vt V128:$v2)))), 893 (BITSELECT $v1, $v2, $c)>; 894 895// Bitselect is also equivalent to ((v1 ^ v2) & c) ^ v2 896foreach vec = IntVecs in 897def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)), 898 (vec.vt V128:$c)), 899 (vec.vt V128:$v2))), 900 (BITSELECT $v1, $v2, $c)>; 901 902// Same pattern with `c` negated so `a` and `b` get swapped. 903foreach vec = IntVecs in 904def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)), 905 (vnot (vec.vt V128:$c))), 906 (vec.vt V128:$v2))), 907 (BITSELECT $v2, $v1, $c)>; 908 909// Also implement vselect in terms of bitselect 910foreach vec = StdVecs in 911def : Pat<(vec.vt (vselect 912 (vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))), 913 (BITSELECT $v1, $v2, $c)>; 914 915// MVP select on v128 values 916defm SELECT_V128 : 917 I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [], 918 "v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>; 919 920foreach vec = StdVecs in { 921def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 922 (SELECT_V128 $lhs, $rhs, $cond)>; 923 924// ISD::SELECT requires its operand to conform to getBooleanContents, but 925// WebAssembly's select interprets any non-zero value as true, so we can fold 926// a setne with 0 into a select. 927def : Pat<(select 928 (i32 (setne I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 929 (SELECT_V128 $lhs, $rhs, $cond)>; 930 931// And again, this time with seteq instead of setne and the arms reversed. 932def : Pat<(select 933 (i32 (seteq I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 934 (SELECT_V128 $rhs, $lhs, $cond)>; 935} // foreach vec 936 937//===----------------------------------------------------------------------===// 938// Integer unary arithmetic 939//===----------------------------------------------------------------------===// 940 941multiclass SIMDUnaryInt<SDPatternOperator node, string name, bits<32> baseInst> { 942 defm "" : SIMDUnary<I8x16, node, name, baseInst>; 943 defm "" : SIMDUnary<I16x8, node, name, !add(baseInst, 32)>; 944 defm "" : SIMDUnary<I32x4, node, name, !add(baseInst, 64)>; 945 defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>; 946} 947 948// Integer vector negation 949def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>; 950 951// Integer absolute value: abs 952defm ABS : SIMDUnaryInt<abs, "abs", 96>; 953 954// Integer negation: neg 955defm NEG : SIMDUnaryInt<ivneg, "neg", 97>; 956 957// Population count: popcnt 958defm POPCNT : SIMDUnary<I8x16, ctpop, "popcnt", 0x62>; 959 960// Any lane true: any_true 961defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [], 962 "v128.any_true\t$dst, $vec", "v128.any_true", 0x53>; 963 964foreach vec = IntVecs in 965def : Pat<(int_wasm_anytrue (vec.vt V128:$vec)), (ANYTRUE V128:$vec)>; 966 967// All lanes true: all_true 968multiclass SIMDAllTrue<Vec vec, bits<32> simdop> { 969 defm ALLTRUE_#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 970 [(set I32:$dst, 971 (i32 (int_wasm_alltrue (vec.vt V128:$vec))))], 972 vec.prefix#".all_true\t$dst, $vec", 973 vec.prefix#".all_true", simdop>; 974} 975 976defm "" : SIMDAllTrue<I8x16, 0x63>; 977defm "" : SIMDAllTrue<I16x8, 0x83>; 978defm "" : SIMDAllTrue<I32x4, 0xa3>; 979defm "" : SIMDAllTrue<I64x2, 0xc3>; 980 981// Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 982// can be folded out 983foreach reduction = 984 [["int_wasm_anytrue", "ANYTRUE", "I8x16"], 985 ["int_wasm_anytrue", "ANYTRUE", "I16x8"], 986 ["int_wasm_anytrue", "ANYTRUE", "I32x4"], 987 ["int_wasm_anytrue", "ANYTRUE", "I64x2"], 988 ["int_wasm_alltrue", "ALLTRUE_I8x16", "I8x16"], 989 ["int_wasm_alltrue", "ALLTRUE_I16x8", "I16x8"], 990 ["int_wasm_alltrue", "ALLTRUE_I32x4", "I32x4"], 991 ["int_wasm_alltrue", "ALLTRUE_I64x2", "I64x2"]] in { 992defvar intrinsic = !cast<Intrinsic>(reduction[0]); 993defvar inst = !cast<NI>(reduction[1]); 994defvar vec = !cast<Vec>(reduction[2]); 995def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; 996def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>; 997def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; 998} 999 1000multiclass SIMDBitmask<Vec vec, bits<32> simdop> { 1001 defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), 1002 [(set I32:$dst, 1003 (i32 (int_wasm_bitmask (vec.vt V128:$vec))))], 1004 vec.prefix#".bitmask\t$dst, $vec", vec.prefix#".bitmask", 1005 simdop>; 1006} 1007 1008defm BITMASK : SIMDBitmask<I8x16, 100>; 1009defm BITMASK : SIMDBitmask<I16x8, 132>; 1010defm BITMASK : SIMDBitmask<I32x4, 164>; 1011defm BITMASK : SIMDBitmask<I64x2, 196>; 1012 1013//===----------------------------------------------------------------------===// 1014// Bit shifts 1015//===----------------------------------------------------------------------===// 1016 1017multiclass SIMDShift<Vec vec, SDNode node, string name, bits<32> simdop> { 1018 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins), 1019 [(set (vec.vt V128:$dst), (node V128:$vec, I32:$x))], 1020 vec.prefix#"."#name#"\t$dst, $vec, $x", 1021 vec.prefix#"."#name, simdop>; 1022} 1023 1024multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> { 1025 defm "" : SIMDShift<I8x16, node, name, baseInst>; 1026 defm "" : SIMDShift<I16x8, node, name, !add(baseInst, 32)>; 1027 defm "" : SIMDShift<I32x4, node, name, !add(baseInst, 64)>; 1028 defm "" : SIMDShift<I64x2, node, name, !add(baseInst, 96)>; 1029} 1030 1031// WebAssembly SIMD shifts are nonstandard in that the shift amount is 1032// an i32 rather than a vector, so they need custom nodes. 1033def wasm_shift_t : 1034 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; 1035def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>; 1036def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>; 1037def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>; 1038 1039// Left shift by scalar: shl 1040defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>; 1041 1042// Right shift by scalar: shr_s / shr_u 1043defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>; 1044defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>; 1045 1046// Optimize away an explicit mask on a shift count. 1047def : Pat<(wasm_shl (v16i8 V128:$lhs), (and I32:$rhs, 7)), 1048 (SHL_I8x16 V128:$lhs, I32:$rhs)>; 1049def : Pat<(wasm_shr_s (v16i8 V128:$lhs), (and I32:$rhs, 7)), 1050 (SHR_S_I8x16 V128:$lhs, I32:$rhs)>; 1051def : Pat<(wasm_shr_u (v16i8 V128:$lhs), (and I32:$rhs, 7)), 1052 (SHR_U_I8x16 V128:$lhs, I32:$rhs)>; 1053 1054def : Pat<(wasm_shl (v8i16 V128:$lhs), (and I32:$rhs, 15)), 1055 (SHL_I16x8 V128:$lhs, I32:$rhs)>; 1056def : Pat<(wasm_shr_s (v8i16 V128:$lhs), (and I32:$rhs, 15)), 1057 (SHR_S_I16x8 V128:$lhs, I32:$rhs)>; 1058def : Pat<(wasm_shr_u (v8i16 V128:$lhs), (and I32:$rhs, 15)), 1059 (SHR_U_I16x8 V128:$lhs, I32:$rhs)>; 1060 1061def : Pat<(wasm_shl (v4i32 V128:$lhs), (and I32:$rhs, 31)), 1062 (SHL_I32x4 V128:$lhs, I32:$rhs)>; 1063def : Pat<(wasm_shr_s (v4i32 V128:$lhs), (and I32:$rhs, 31)), 1064 (SHR_S_I32x4 V128:$lhs, I32:$rhs)>; 1065def : Pat<(wasm_shr_u (v4i32 V128:$lhs), (and I32:$rhs, 31)), 1066 (SHR_U_I32x4 V128:$lhs, I32:$rhs)>; 1067 1068def : Pat<(wasm_shl (v2i64 V128:$lhs), (and I32:$rhs, 63)), 1069 (SHL_I64x2 V128:$lhs, I32:$rhs)>; 1070def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (and I32:$rhs, 63)), 1071 (SHR_S_I64x2 V128:$lhs, I32:$rhs)>; 1072def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (and I32:$rhs, 63)), 1073 (SHR_U_I64x2 V128:$lhs, I32:$rhs)>; 1074def : Pat<(wasm_shl (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))), 1075 (SHL_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>; 1076def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))), 1077 (SHR_S_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>; 1078def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))), 1079 (SHR_U_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>; 1080 1081//===----------------------------------------------------------------------===// 1082// Integer binary arithmetic 1083//===----------------------------------------------------------------------===// 1084 1085multiclass SIMDBinaryIntNoI8x16<SDPatternOperator node, string name, bits<32> baseInst> { 1086 defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>; 1087 defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>; 1088 defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>; 1089} 1090 1091multiclass SIMDBinaryIntSmall<SDPatternOperator node, string name, bits<32> baseInst> { 1092 defm "" : SIMDBinary<I8x16, node, name, baseInst>; 1093 defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>; 1094} 1095 1096multiclass SIMDBinaryIntNoI64x2<SDPatternOperator node, string name, bits<32> baseInst> { 1097 defm "" : SIMDBinaryIntSmall<node, name, baseInst>; 1098 defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>; 1099} 1100 1101multiclass SIMDBinaryInt<SDPatternOperator node, string name, bits<32> baseInst> { 1102 defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>; 1103 defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>; 1104} 1105 1106// Integer addition: add / add_sat_s / add_sat_u 1107let isCommutable = 1 in { 1108defm ADD : SIMDBinaryInt<add, "add", 110>; 1109defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_sat_s", 111>; 1110defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_sat_u", 112>; 1111} // isCommutable = 1 1112 1113// Integer subtraction: sub / sub_sat_s / sub_sat_u 1114defm SUB : SIMDBinaryInt<sub, "sub", 113>; 1115defm SUB_SAT_S : SIMDBinaryIntSmall<ssubsat, "sub_sat_s", 114>; 1116defm SUB_SAT_U : SIMDBinaryIntSmall<usubsat, "sub_sat_u", 115>; 1117 1118// Integer multiplication: mul 1119let isCommutable = 1 in 1120defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>; 1121 1122// Integer min_s / min_u / max_s / max_u 1123let isCommutable = 1 in { 1124defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>; 1125defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>; 1126defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>; 1127defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>; 1128} // isCommutable = 1 1129 1130// Integer unsigned rounding average: avgr_u 1131let isCommutable = 1 in { 1132defm AVGR_U : SIMDBinary<I8x16, int_wasm_avgr_unsigned, "avgr_u", 123>; 1133defm AVGR_U : SIMDBinary<I16x8, int_wasm_avgr_unsigned, "avgr_u", 155>; 1134} 1135 1136def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), (add $lhs, $rhs), 1137 "return N->getFlags().hasNoUnsignedWrap();">; 1138 1139foreach vec = [I8x16, I16x8] in { 1140defvar inst = !cast<NI>("AVGR_U_"#vec); 1141def : Pat<(wasm_shr_u 1142 (add_nuw 1143 (add_nuw (vec.vt V128:$lhs), (vec.vt V128:$rhs)), 1144 (vec.splat (i32 1))), 1145 (i32 1)), 1146 (inst $lhs, $rhs)>; 1147} 1148 1149// Widening dot product: i32x4.dot_i16x8_s 1150let isCommutable = 1 in 1151defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 1152 [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))], 1153 "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", 1154 186>; 1155 1156// Extending multiplication: extmul_{low,high}_P, extmul_high 1157def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1158def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>; 1159def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>; 1160def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>; 1161def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>; 1162 1163multiclass SIMDExtBinary<Vec vec, SDPatternOperator node, string name, 1164 bits<32> simdop> { 1165 defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 1166 (outs), (ins), 1167 [(set (vec.vt V128:$dst), (node 1168 (vec.split.vt V128:$lhs),(vec.split.vt V128:$rhs)))], 1169 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 1170 vec.prefix#"."#name, simdop>; 1171} 1172 1173class ExtMulPat<SDNode extend> : 1174 PatFrag<(ops node:$lhs, node:$rhs), 1175 (mul (extend $lhs), (extend $rhs))> {} 1176 1177def extmul_low_s : ExtMulPat<extend_low_s>; 1178def extmul_high_s : ExtMulPat<extend_high_s>; 1179def extmul_low_u : ExtMulPat<extend_low_u>; 1180def extmul_high_u : ExtMulPat<extend_high_u>; 1181 1182defm EXTMUL_LOW_S : 1183 SIMDExtBinary<I16x8, extmul_low_s, "extmul_low_i8x16_s", 0x9c>; 1184defm EXTMUL_HIGH_S : 1185 SIMDExtBinary<I16x8, extmul_high_s, "extmul_high_i8x16_s", 0x9d>; 1186defm EXTMUL_LOW_U : 1187 SIMDExtBinary<I16x8, extmul_low_u, "extmul_low_i8x16_u", 0x9e>; 1188defm EXTMUL_HIGH_U : 1189 SIMDExtBinary<I16x8, extmul_high_u, "extmul_high_i8x16_u", 0x9f>; 1190 1191defm EXTMUL_LOW_S : 1192 SIMDExtBinary<I32x4, extmul_low_s, "extmul_low_i16x8_s", 0xbc>; 1193defm EXTMUL_HIGH_S : 1194 SIMDExtBinary<I32x4, extmul_high_s, "extmul_high_i16x8_s", 0xbd>; 1195defm EXTMUL_LOW_U : 1196 SIMDExtBinary<I32x4, extmul_low_u, "extmul_low_i16x8_u", 0xbe>; 1197defm EXTMUL_HIGH_U : 1198 SIMDExtBinary<I32x4, extmul_high_u, "extmul_high_i16x8_u", 0xbf>; 1199 1200defm EXTMUL_LOW_S : 1201 SIMDExtBinary<I64x2, extmul_low_s, "extmul_low_i32x4_s", 0xdc>; 1202defm EXTMUL_HIGH_S : 1203 SIMDExtBinary<I64x2, extmul_high_s, "extmul_high_i32x4_s", 0xdd>; 1204defm EXTMUL_LOW_U : 1205 SIMDExtBinary<I64x2, extmul_low_u, "extmul_low_i32x4_u", 0xde>; 1206defm EXTMUL_HIGH_U : 1207 SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>; 1208 1209//===----------------------------------------------------------------------===// 1210// Floating-point unary arithmetic 1211//===----------------------------------------------------------------------===// 1212 1213multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> { 1214 defm "" : SIMDUnary<F32x4, node, name, baseInst>; 1215 defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>; 1216 // Unlike F32x4 and F64x2 there's not a gap in the opcodes between "neg" and 1217 // "sqrt" so subtract one from the offset. 1218 defm "" : HalfPrecisionUnary<F16x8, node, name, 1219 !add(baseInst,!if(!eq(name, "sqrt"), 79, 80))>; 1220} 1221 1222// Absolute value: abs 1223defm ABS : SIMDUnaryFP<fabs, "abs", 224>; 1224 1225// Negation: neg 1226defm NEG : SIMDUnaryFP<fneg, "neg", 225>; 1227 1228// Square root: sqrt 1229defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>; 1230 1231// Rounding: ceil, floor, trunc, nearest 1232defm CEIL : SIMDUnary<F32x4, fceil, "ceil", 0x67>; 1233defm FLOOR : SIMDUnary<F32x4, ffloor, "floor", 0x68>; 1234defm TRUNC: SIMDUnary<F32x4, ftrunc, "trunc", 0x69>; 1235defm NEAREST: SIMDUnary<F32x4, fnearbyint, "nearest", 0x6a>; 1236defm CEIL : SIMDUnary<F64x2, fceil, "ceil", 0x74>; 1237defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>; 1238defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>; 1239defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>; 1240defm CEIL : HalfPrecisionUnary<F16x8, fceil, "ceil", 0x133>; 1241defm FLOOR : HalfPrecisionUnary<F16x8, ffloor, "floor", 0x134>; 1242defm TRUNC : HalfPrecisionUnary<F16x8, ftrunc, "trunc", 0x135>; 1243defm NEAREST : HalfPrecisionUnary<F16x8, fnearbyint, "nearest", 0x136>; 1244 1245// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. 1246def : Pat<(v4f32 (frint (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>; 1247def : Pat<(v2f64 (frint (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; 1248def : Pat<(v8f16 (frint (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>; 1249 1250// WebAssembly always rounds ties-to-even, so map froundeven to fnearbyint. 1251def : Pat<(v4f32 (froundeven (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>; 1252def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>; 1253def : Pat<(v8f16 (froundeven (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>; 1254 1255//===----------------------------------------------------------------------===// 1256// Floating-point binary arithmetic 1257//===----------------------------------------------------------------------===// 1258 1259multiclass SIMDBinaryFP<SDPatternOperator node, string name, bits<32> baseInst> { 1260 defm "" : SIMDBinary<F32x4, node, name, baseInst>; 1261 defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>; 1262 defm "" : HalfPrecisionBinary<F16x8, node, name, !add(baseInst, 89)>; 1263} 1264 1265// Addition: add 1266let isCommutable = 1 in 1267defm ADD : SIMDBinaryFP<fadd, "add", 228>; 1268 1269// Subtraction: sub 1270defm SUB : SIMDBinaryFP<fsub, "sub", 229>; 1271 1272// Multiplication: mul 1273let isCommutable = 1 in 1274defm MUL : SIMDBinaryFP<fmul, "mul", 230>; 1275 1276// Division: div 1277defm DIV : SIMDBinaryFP<fdiv, "div", 231>; 1278 1279// NaN-propagating minimum: min 1280defm MIN : SIMDBinaryFP<fminimum, "min", 232>; 1281 1282// NaN-propagating maximum: max 1283defm MAX : SIMDBinaryFP<fmaximum, "max", 233>; 1284 1285// Pseudo-minimum: pmin 1286def pmin : PatFrags<(ops node:$lhs, node:$rhs), [ 1287 (vselect (setolt $rhs, $lhs), $rhs, $lhs), 1288 (vselect (setole $rhs, $lhs), $rhs, $lhs), 1289 (vselect (setogt $lhs, $rhs), $rhs, $lhs), 1290 (vselect (setoge $lhs, $rhs), $rhs, $lhs), 1291 (vselect (setlt $lhs, $rhs), $lhs, $rhs), 1292 (vselect (setle $lhs, $rhs), $lhs, $rhs), 1293 (vselect (setgt $lhs, $rhs), $rhs, $lhs), 1294 (vselect (setge $lhs, $rhs), $rhs, $lhs) 1295]>; 1296defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>; 1297 1298// Pseudo-maximum: pmax 1299def pmax : PatFrags<(ops node:$lhs, node:$rhs), [ 1300 (vselect (setogt $rhs, $lhs), $rhs, $lhs), 1301 (vselect (setoge $rhs, $lhs), $rhs, $lhs), 1302 (vselect (setolt $lhs, $rhs), $rhs, $lhs), 1303 (vselect (setole $lhs, $rhs), $rhs, $lhs), 1304 (vselect (setgt $lhs, $rhs), $lhs, $rhs), 1305 (vselect (setge $lhs, $rhs), $lhs, $rhs), 1306 (vselect (setlt $lhs, $rhs), $rhs, $lhs), 1307 (vselect (setle $lhs, $rhs), $rhs, $lhs) 1308]>; 1309defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>; 1310 1311// Also match the pmin/pmax cases where the operands are int vectors (but the 1312// comparison is still a floating point comparison). This can happen when using 1313// the wasm_simd128.h intrinsics because v128_t is an integer vector. 1314foreach vec = [F32x4, F64x2, F16x8] in { 1315defvar pmin = !cast<NI>("PMIN_"#vec); 1316defvar pmax = !cast<NI>("PMAX_"#vec); 1317def : Pat<(vec.int_vt (vselect 1318 (setolt (vec.vt (bitconvert V128:$rhs)), 1319 (vec.vt (bitconvert V128:$lhs))), 1320 V128:$rhs, V128:$lhs)), 1321 (pmin $lhs, $rhs)>; 1322def : Pat<(vec.int_vt (vselect 1323 (setolt (vec.vt (bitconvert V128:$lhs)), 1324 (vec.vt (bitconvert V128:$rhs))), 1325 V128:$rhs, V128:$lhs)), 1326 (pmax $lhs, $rhs)>; 1327} 1328 1329// And match the pmin/pmax LLVM intrinsics as well 1330def : Pat<(v4f32 (int_wasm_pmin (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 1331 (PMIN_F32x4 V128:$lhs, V128:$rhs)>; 1332def : Pat<(v4f32 (int_wasm_pmax (v4f32 V128:$lhs), (v4f32 V128:$rhs))), 1333 (PMAX_F32x4 V128:$lhs, V128:$rhs)>; 1334def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 1335 (PMIN_F64x2 V128:$lhs, V128:$rhs)>; 1336def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))), 1337 (PMAX_F64x2 V128:$lhs, V128:$rhs)>; 1338def : Pat<(v8f16 (int_wasm_pmin (v8f16 V128:$lhs), (v8f16 V128:$rhs))), 1339 (PMIN_F16x8 V128:$lhs, V128:$rhs)>; 1340def : Pat<(v8f16 (int_wasm_pmax (v8f16 V128:$lhs), (v8f16 V128:$rhs))), 1341 (PMAX_F16x8 V128:$lhs, V128:$rhs)>; 1342 1343//===----------------------------------------------------------------------===// 1344// Conversions 1345//===----------------------------------------------------------------------===// 1346 1347multiclass SIMDConvert<Vec vec, Vec arg, SDPatternOperator op, string name, 1348 bits<32> simdop, list<Predicate> reqs = []> { 1349 defm op#_#vec : 1350 SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 1351 [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], 1352 vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop, reqs>; 1353} 1354 1355multiclass HalfPrecisionConvert<Vec vec, Vec arg, SDPatternOperator op, 1356 string name, bits<32> simdop> { 1357 defm "" : SIMDConvert<vec, arg, op, name, simdop, [HasFP16]>; 1358} 1359 1360// Floating point to integer with saturation: trunc_sat 1361defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>; 1362defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>; 1363defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_sint, "trunc_sat_f16x8_s", 0x145>; 1364defm "" : HalfPrecisionConvert<I16x8, F16x8, fp_to_uint, "trunc_sat_f16x8_u", 0x146>; 1365 1366// Support the saturating variety as well. 1367def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i32)>; 1368def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i32)>; 1369def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>; 1370def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>; 1371 1372def trunc_s_sat16 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i16)>; 1373def trunc_u_sat16 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i16)>; 1374def : Pat<(v8i16 (trunc_s_sat16 (v8f16 V128:$src))), (fp_to_sint_I16x8 $src)>; 1375def : Pat<(v8i16 (trunc_u_sat16 (v8f16 V128:$src))), (fp_to_uint_I16x8 $src)>; 1376 1377def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1378def trunc_sat_zero_s : 1379 SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>; 1380def trunc_sat_zero_u : 1381 SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>; 1382defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_s, "trunc_sat_f64x2_s_zero", 1383 0xfc>; 1384defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_u, "trunc_sat_f64x2_u_zero", 1385 0xfd>; 1386 1387// Integer to floating point: convert 1388def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1389def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>; 1390def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>; 1391defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>; 1392defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>; 1393defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>; 1394defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>; 1395defm "" : HalfPrecisionConvert<F16x8, I16x8, sint_to_fp, "convert_i16x8_s", 0x147>; 1396defm "" : HalfPrecisionConvert<F16x8, I16x8, uint_to_fp, "convert_i16x8_u", 0x148>; 1397 1398// Extending operations 1399// TODO: refactor this to be uniform for i64x2 if the numbering is not changed. 1400multiclass SIMDExtend<Vec vec, bits<32> baseInst> { 1401 defm "" : SIMDConvert<vec, vec.split, extend_low_s, 1402 "extend_low_"#vec.split.prefix#"_s", baseInst>; 1403 defm "" : SIMDConvert<vec, vec.split, extend_high_s, 1404 "extend_high_"#vec.split.prefix#"_s", !add(baseInst, 1)>; 1405 defm "" : SIMDConvert<vec, vec.split, extend_low_u, 1406 "extend_low_"#vec.split.prefix#"_u", !add(baseInst, 2)>; 1407 defm "" : SIMDConvert<vec, vec.split, extend_high_u, 1408 "extend_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>; 1409} 1410 1411defm "" : SIMDExtend<I16x8, 0x87>; 1412defm "" : SIMDExtend<I32x4, 0xa7>; 1413defm "" : SIMDExtend<I64x2, 0xc7>; 1414 1415// Narrowing operations 1416multiclass SIMDNarrow<Vec vec, bits<32> baseInst> { 1417 defvar name = vec.split.prefix#".narrow_"#vec.prefix; 1418 defm NARROW_S_#vec.split : 1419 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 1420 [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_signed 1421 (vec.vt V128:$low), (vec.vt V128:$high))))], 1422 name#"_s\t$dst, $low, $high", name#"_s", baseInst>; 1423 defm NARROW_U_#vec.split : 1424 SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins), 1425 [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_unsigned 1426 (vec.vt V128:$low), (vec.vt V128:$high))))], 1427 name#"_u\t$dst, $low, $high", name#"_u", !add(baseInst, 1)>; 1428} 1429 1430defm "" : SIMDNarrow<I16x8, 101>; 1431defm "" : SIMDNarrow<I32x4, 133>; 1432 1433// WebAssemblyISD::NARROW_U 1434def wasm_narrow_t : SDTypeProfile<1, 2, []>; 1435def wasm_narrow_u : SDNode<"WebAssemblyISD::NARROW_U", wasm_narrow_t>; 1436def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))), 1437 (NARROW_U_I8x16 $left, $right)>; 1438def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))), 1439 (NARROW_U_I16x8 $left, $right)>; 1440 1441// Bitcasts are nops 1442// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types 1443foreach t1 = AllVecs in 1444foreach t2 = AllVecs in 1445if !ne(t1, t2) then 1446def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>; 1447 1448// Extended pairwise addition 1449defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed, 1450 "extadd_pairwise_i8x16_s", 0x7c>; 1451defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned, 1452 "extadd_pairwise_i8x16_u", 0x7d>; 1453defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed, 1454 "extadd_pairwise_i16x8_s", 0x7e>; 1455defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned, 1456 "extadd_pairwise_i16x8_u", 0x7f>; 1457 1458// f64x2 <-> f32x4 conversions 1459def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1460def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>; 1461defm "" : SIMDConvert<F32x4, F64x2, demote_zero, 1462 "demote_f64x2_zero", 0x5e>; 1463 1464def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; 1465def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>; 1466defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>; 1467 1468// Lower extending loads to load64_zero + promote_low 1469def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { 1470 let MemoryVT = v2f32; 1471} 1472// Adapted from the body of LoadPatNoOffset 1473// TODO: other addressing patterns 1474def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))), 1475 (promote_low_F64x2 (LOAD_ZERO_64_A32 0, 0, I32:$addr))>, 1476 Requires<[HasAddr32]>; 1477def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))), 1478 (promote_low_F64x2 (LOAD_ZERO_64_A64 0, 0, I64:$addr))>, 1479 Requires<[HasAddr64]>; 1480 1481//===----------------------------------------------------------------------===// 1482// Saturating Rounding Q-Format Multiplication 1483//===----------------------------------------------------------------------===// 1484 1485defm Q15MULR_SAT_S : 1486 SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>; 1487 1488//===----------------------------------------------------------------------===// 1489// Relaxed swizzle 1490//===----------------------------------------------------------------------===// 1491 1492defm RELAXED_SWIZZLE : 1493 RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), 1494 [(set (v16i8 V128:$dst), 1495 (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], 1496 "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 0x100>; 1497 1498//===----------------------------------------------------------------------===// 1499// Relaxed floating-point to int conversions 1500//===----------------------------------------------------------------------===// 1501 1502multiclass RelaxedConvert<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> { 1503 defm op#_#vec : 1504 RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins), 1505 [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))], 1506 vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>; 1507} 1508 1509defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_signed, 1510 "relaxed_trunc_f32x4_s", 0x101>; 1511defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned, 1512 "relaxed_trunc_f32x4_u", 0x102>; 1513defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_signed_zero, 1514 "relaxed_trunc_f64x2_s_zero", 0x103>; 1515defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero, 1516 "relaxed_trunc_f64x2_u_zero", 0x104>; 1517 1518//===----------------------------------------------------------------------===// 1519// Relaxed (Negative) Multiply-Add (madd/nmadd) 1520//===----------------------------------------------------------------------===// 1521 1522multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate> reqs> { 1523 defm MADD_#vec : 1524 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), 1525 [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd 1526 (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], 1527 vec.prefix#".relaxed_madd\t$dst, $a, $b, $c", 1528 vec.prefix#".relaxed_madd", simdopA, reqs>; 1529 defm NMADD_#vec : 1530 SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), 1531 [(set (vec.vt V128:$dst), (int_wasm_relaxed_nmadd 1532 (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], 1533 vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c", 1534 vec.prefix#".relaxed_nmadd", simdopS, reqs>; 1535} 1536 1537defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>; 1538defm "" : SIMDMADD<F64x2, 0x107, 0x108, [HasRelaxedSIMD]>; 1539defm "" : SIMDMADD<F16x8, 0x14e, 0x14f, [HasFP16]>; 1540 1541//===----------------------------------------------------------------------===// 1542// Laneselect 1543//===----------------------------------------------------------------------===// 1544 1545multiclass SIMDLANESELECT<Vec vec, bits<32> op> { 1546 defm LANESELECT_#vec : 1547 RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins), 1548 [(set (vec.vt V128:$dst), (int_wasm_relaxed_laneselect 1549 (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))], 1550 vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c", 1551 vec.prefix#".relaxed_laneselect", op>; 1552} 1553 1554defm "" : SIMDLANESELECT<I8x16, 0x109>; 1555defm "" : SIMDLANESELECT<I16x8, 0x10a>; 1556defm "" : SIMDLANESELECT<I32x4, 0x10b>; 1557defm "" : SIMDLANESELECT<I64x2, 0x10c>; 1558 1559//===----------------------------------------------------------------------===// 1560// Relaxed floating-point min and max. 1561//===----------------------------------------------------------------------===// 1562 1563multiclass RelaxedBinary<Vec vec, SDPatternOperator node, string name, 1564 bits<32> simdop> { 1565 defm _#vec : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), 1566 (outs), (ins), 1567 [(set (vec.vt V128:$dst), 1568 (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))], 1569 vec.prefix#"."#name#"\t$dst, $lhs, $rhs", 1570 vec.prefix#"."#name, simdop>; 1571} 1572 1573defm SIMD_RELAXED_FMIN : 1574 RelaxedBinary<F32x4, int_wasm_relaxed_min, "relaxed_min", 0x10d>; 1575defm SIMD_RELAXED_FMAX : 1576 RelaxedBinary<F32x4, int_wasm_relaxed_max, "relaxed_max", 0x10e>; 1577defm SIMD_RELAXED_FMIN : 1578 RelaxedBinary<F64x2, int_wasm_relaxed_min, "relaxed_min", 0x10f>; 1579defm SIMD_RELAXED_FMAX : 1580 RelaxedBinary<F64x2, int_wasm_relaxed_max, "relaxed_max", 0x110>; 1581 1582//===----------------------------------------------------------------------===// 1583// Relaxed rounding q15 multiplication 1584//===----------------------------------------------------------------------===// 1585 1586defm RELAXED_Q15MULR_S : 1587 RelaxedBinary<I16x8, int_wasm_relaxed_q15mulr_signed, "relaxed_q15mulr_s", 1588 0x111>; 1589 1590//===----------------------------------------------------------------------===// 1591// Relaxed integer dot product 1592//===----------------------------------------------------------------------===// 1593 1594defm RELAXED_DOT : 1595 RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins), 1596 [(set (v8i16 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_signed 1597 (v16i8 V128:$lhs), (v16i8 V128:$rhs)))], 1598 "i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs", 1599 "i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>; 1600 1601defm RELAXED_DOT_ADD : 1602 RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), 1603 (outs), (ins), 1604 [(set (v4i32 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_add_signed 1605 (v16i8 V128:$lhs), (v16i8 V128:$rhs), (v4i32 V128:$acc)))], 1606 "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc", 1607 "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>; 1608 1609//===----------------------------------------------------------------------===// 1610// Relaxed BFloat16 dot product 1611//===----------------------------------------------------------------------===// 1612 1613defm RELAXED_DOT_BFLOAT : 1614 RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc), 1615 (outs), (ins), 1616 [(set (v4f32 V128:$dst), (int_wasm_relaxed_dot_bf16x8_add_f32 1617 (v8i16 V128:$lhs), (v8i16 V128:$rhs), (v4f32 V128:$acc)))], 1618 "f32x4.relaxed_dot_bf16x8_add_f32\t$dst, $lhs, $rhs, $acc", 1619 "f32x4.relaxed_dot_bf16x8_add_f32", 0x114>; 1620