xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/X86/X86InstrFoldTables.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains the X86 memory folding tables.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "X86InstrFoldTables.h"
140b57cec5SDimitry Andric #include "X86InstrInfo.h"
150b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
16753f127fSDimitry Andric #include <atomic>
170b57cec5SDimitry Andric #include <vector>
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric using namespace llvm;
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric // These tables are sorted by their RegOp value allowing them to be binary
220b57cec5SDimitry Andric // searched at runtime without the need for additional storage. The enum values
230b57cec5SDimitry Andric // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
240b57cec5SDimitry Andric // makes sorting these tables a simple matter of alphabetizing the table.
2506c3fb27SDimitry Andric #include "X86GenFoldTables.inc"
268bcb0991SDimitry Andric 
2706c3fb27SDimitry Andric // Table to map instructions safe to broadcast using a different width from the
2806c3fb27SDimitry Andric // element width.
295f757f3fSDimitry Andric static const X86FoldTableEntry BroadcastSizeTable2[] = {
3006c3fb27SDimitry Andric   { X86::VANDNPDZ128rr,        X86::VANDNPSZ128rmb,       TB_BCAST_SS },
3106c3fb27SDimitry Andric   { X86::VANDNPDZ256rr,        X86::VANDNPSZ256rmb,       TB_BCAST_SS },
3206c3fb27SDimitry Andric   { X86::VANDNPDZrr,           X86::VANDNPSZrmb,          TB_BCAST_SS },
3306c3fb27SDimitry Andric   { X86::VANDNPSZ128rr,        X86::VANDNPDZ128rmb,       TB_BCAST_SD },
3406c3fb27SDimitry Andric   { X86::VANDNPSZ256rr,        X86::VANDNPDZ256rmb,       TB_BCAST_SD },
3506c3fb27SDimitry Andric   { X86::VANDNPSZrr,           X86::VANDNPDZrmb,          TB_BCAST_SD },
3606c3fb27SDimitry Andric   { X86::VANDPDZ128rr,         X86::VANDPSZ128rmb,        TB_BCAST_SS },
3706c3fb27SDimitry Andric   { X86::VANDPDZ256rr,         X86::VANDPSZ256rmb,        TB_BCAST_SS },
3806c3fb27SDimitry Andric   { X86::VANDPDZrr,            X86::VANDPSZrmb,           TB_BCAST_SS },
3906c3fb27SDimitry Andric   { X86::VANDPSZ128rr,         X86::VANDPDZ128rmb,        TB_BCAST_SD },
4006c3fb27SDimitry Andric   { X86::VANDPSZ256rr,         X86::VANDPDZ256rmb,        TB_BCAST_SD },
4106c3fb27SDimitry Andric   { X86::VANDPSZrr,            X86::VANDPDZrmb,           TB_BCAST_SD },
4206c3fb27SDimitry Andric   { X86::VORPDZ128rr,          X86::VORPSZ128rmb,         TB_BCAST_SS },
4306c3fb27SDimitry Andric   { X86::VORPDZ256rr,          X86::VORPSZ256rmb,         TB_BCAST_SS },
4406c3fb27SDimitry Andric   { X86::VORPDZrr,             X86::VORPSZrmb,            TB_BCAST_SS },
4506c3fb27SDimitry Andric   { X86::VORPSZ128rr,          X86::VORPDZ128rmb,         TB_BCAST_SD },
4606c3fb27SDimitry Andric   { X86::VORPSZ256rr,          X86::VORPDZ256rmb,         TB_BCAST_SD },
4706c3fb27SDimitry Andric   { X86::VORPSZrr,             X86::VORPDZrmb,            TB_BCAST_SD },
4806c3fb27SDimitry Andric   { X86::VPANDDZ128rr,         X86::VPANDQZ128rmb,        TB_BCAST_Q },
4906c3fb27SDimitry Andric   { X86::VPANDDZ256rr,         X86::VPANDQZ256rmb,        TB_BCAST_Q },
5006c3fb27SDimitry Andric   { X86::VPANDDZrr,            X86::VPANDQZrmb,           TB_BCAST_Q },
5106c3fb27SDimitry Andric   { X86::VPANDNDZ128rr,        X86::VPANDNQZ128rmb,       TB_BCAST_Q },
5206c3fb27SDimitry Andric   { X86::VPANDNDZ256rr,        X86::VPANDNQZ256rmb,       TB_BCAST_Q },
5306c3fb27SDimitry Andric   { X86::VPANDNDZrr,           X86::VPANDNQZrmb,          TB_BCAST_Q },
5406c3fb27SDimitry Andric   { X86::VPANDNQZ128rr,        X86::VPANDNDZ128rmb,       TB_BCAST_D },
5506c3fb27SDimitry Andric   { X86::VPANDNQZ256rr,        X86::VPANDNDZ256rmb,       TB_BCAST_D },
5606c3fb27SDimitry Andric   { X86::VPANDNQZrr,           X86::VPANDNDZrmb,          TB_BCAST_D },
5706c3fb27SDimitry Andric   { X86::VPANDQZ128rr,         X86::VPANDDZ128rmb,        TB_BCAST_D },
5806c3fb27SDimitry Andric   { X86::VPANDQZ256rr,         X86::VPANDDZ256rmb,        TB_BCAST_D },
5906c3fb27SDimitry Andric   { X86::VPANDQZrr,            X86::VPANDDZrmb,           TB_BCAST_D },
6006c3fb27SDimitry Andric   { X86::VPORDZ128rr,          X86::VPORQZ128rmb,         TB_BCAST_Q },
6106c3fb27SDimitry Andric   { X86::VPORDZ256rr,          X86::VPORQZ256rmb,         TB_BCAST_Q },
6206c3fb27SDimitry Andric   { X86::VPORDZrr,             X86::VPORQZrmb,            TB_BCAST_Q },
6306c3fb27SDimitry Andric   { X86::VPORQZ128rr,          X86::VPORDZ128rmb,         TB_BCAST_D },
6406c3fb27SDimitry Andric   { X86::VPORQZ256rr,          X86::VPORDZ256rmb,         TB_BCAST_D },
6506c3fb27SDimitry Andric   { X86::VPORQZrr,             X86::VPORDZrmb,            TB_BCAST_D },
6606c3fb27SDimitry Andric   { X86::VPXORDZ128rr,         X86::VPXORQZ128rmb,        TB_BCAST_Q },
6706c3fb27SDimitry Andric   { X86::VPXORDZ256rr,         X86::VPXORQZ256rmb,        TB_BCAST_Q },
6806c3fb27SDimitry Andric   { X86::VPXORDZrr,            X86::VPXORQZrmb,           TB_BCAST_Q },
6906c3fb27SDimitry Andric   { X86::VPXORQZ128rr,         X86::VPXORDZ128rmb,        TB_BCAST_D },
7006c3fb27SDimitry Andric   { X86::VPXORQZ256rr,         X86::VPXORDZ256rmb,        TB_BCAST_D },
7106c3fb27SDimitry Andric   { X86::VPXORQZrr,            X86::VPXORDZrmb,           TB_BCAST_D },
7206c3fb27SDimitry Andric   { X86::VXORPDZ128rr,         X86::VXORPSZ128rmb,        TB_BCAST_SS },
7306c3fb27SDimitry Andric   { X86::VXORPDZ256rr,         X86::VXORPSZ256rmb,        TB_BCAST_SS },
7406c3fb27SDimitry Andric   { X86::VXORPDZrr,            X86::VXORPSZrmb,           TB_BCAST_SS },
7506c3fb27SDimitry Andric   { X86::VXORPSZ128rr,         X86::VXORPDZ128rmb,        TB_BCAST_SD },
7606c3fb27SDimitry Andric   { X86::VXORPSZ256rr,         X86::VXORPDZ256rmb,        TB_BCAST_SD },
7706c3fb27SDimitry Andric   { X86::VXORPSZrr,            X86::VXORPDZrmb,           TB_BCAST_SD },
7806c3fb27SDimitry Andric };
7906c3fb27SDimitry Andric 
805f757f3fSDimitry Andric static const X86FoldTableEntry BroadcastSizeTable3[] = {
8106c3fb27SDimitry Andric   { X86::VPTERNLOGDZ128rri,    X86::VPTERNLOGQZ128rmbi,   TB_BCAST_Q },
8206c3fb27SDimitry Andric   { X86::VPTERNLOGDZ256rri,    X86::VPTERNLOGQZ256rmbi,   TB_BCAST_Q },
8306c3fb27SDimitry Andric   { X86::VPTERNLOGDZrri,       X86::VPTERNLOGQZrmbi,      TB_BCAST_Q },
8406c3fb27SDimitry Andric   { X86::VPTERNLOGQZ128rri,    X86::VPTERNLOGDZ128rmbi,   TB_BCAST_D },
8506c3fb27SDimitry Andric   { X86::VPTERNLOGQZ256rri,    X86::VPTERNLOGDZ256rmbi,   TB_BCAST_D },
8606c3fb27SDimitry Andric   { X86::VPTERNLOGQZrri,       X86::VPTERNLOGDZrmbi,      TB_BCAST_D },
8706c3fb27SDimitry Andric };
8806c3fb27SDimitry Andric 
895f757f3fSDimitry Andric static const X86FoldTableEntry *
905f757f3fSDimitry Andric lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) {
910b57cec5SDimitry Andric #ifndef NDEBUG
925f757f3fSDimitry Andric #define CHECK_SORTED_UNIQUE(TABLE)                                             \
935f757f3fSDimitry Andric   assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted");                   \
945f757f3fSDimitry Andric   assert(std::adjacent_find(std::begin(Table), std::end(Table)) ==             \
955f757f3fSDimitry Andric              std::end(Table) &&                                                \
965f757f3fSDimitry Andric          #TABLE " is not unique");
975f757f3fSDimitry Andric 
980b57cec5SDimitry Andric   // Make sure the tables are sorted.
990b57cec5SDimitry Andric   static std::atomic<bool> FoldTablesChecked(false);
1000b57cec5SDimitry Andric   if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
1015f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(Table2Addr)
1025f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(Table0)
1035f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(Table1)
1045f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(Table2)
1055f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(Table3)
1065f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(Table4)
1075f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(BroadcastTable1)
1085f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(BroadcastTable2)
1095f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(BroadcastTable3)
1105f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(BroadcastTable4)
1115f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(BroadcastSizeTable2)
1125f757f3fSDimitry Andric     CHECK_SORTED_UNIQUE(BroadcastSizeTable3)
1130b57cec5SDimitry Andric     FoldTablesChecked.store(true, std::memory_order_relaxed);
1140b57cec5SDimitry Andric   }
1150b57cec5SDimitry Andric #endif
1160b57cec5SDimitry Andric 
1175f757f3fSDimitry Andric   const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
1180b57cec5SDimitry Andric   if (Data != Table.end() && Data->KeyOp == RegOp &&
1190b57cec5SDimitry Andric       !(Data->Flags & TB_NO_FORWARD))
1200b57cec5SDimitry Andric     return Data;
1210b57cec5SDimitry Andric   return nullptr;
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric 
124*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupTwoAddrFoldTable(unsigned RegOp) {
1255f757f3fSDimitry Andric   return lookupFoldTableImpl(Table2Addr, RegOp);
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric 
128*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
1295f757f3fSDimitry Andric   ArrayRef<X86FoldTableEntry> FoldTable;
1300b57cec5SDimitry Andric   if (OpNum == 0)
1315f757f3fSDimitry Andric     FoldTable = ArrayRef(Table0);
1320b57cec5SDimitry Andric   else if (OpNum == 1)
1335f757f3fSDimitry Andric     FoldTable = ArrayRef(Table1);
1340b57cec5SDimitry Andric   else if (OpNum == 2)
1355f757f3fSDimitry Andric     FoldTable = ArrayRef(Table2);
1360b57cec5SDimitry Andric   else if (OpNum == 3)
1375f757f3fSDimitry Andric     FoldTable = ArrayRef(Table3);
1380b57cec5SDimitry Andric   else if (OpNum == 4)
1395f757f3fSDimitry Andric     FoldTable = ArrayRef(Table4);
1400b57cec5SDimitry Andric   else
1410b57cec5SDimitry Andric     return nullptr;
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   return lookupFoldTableImpl(FoldTable, RegOp);
1440b57cec5SDimitry Andric }
1450b57cec5SDimitry Andric 
146*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupBroadcastFoldTable(unsigned RegOp,
147*0fca6ea1SDimitry Andric                                                         unsigned OpNum) {
148*0fca6ea1SDimitry Andric   ArrayRef<X86FoldTableEntry> FoldTable;
149*0fca6ea1SDimitry Andric   if (OpNum == 1)
150*0fca6ea1SDimitry Andric     FoldTable = ArrayRef(BroadcastTable1);
151*0fca6ea1SDimitry Andric   else if (OpNum == 2)
152*0fca6ea1SDimitry Andric     FoldTable = ArrayRef(BroadcastTable2);
153*0fca6ea1SDimitry Andric   else if (OpNum == 3)
154*0fca6ea1SDimitry Andric     FoldTable = ArrayRef(BroadcastTable3);
155*0fca6ea1SDimitry Andric   else if (OpNum == 4)
156*0fca6ea1SDimitry Andric     FoldTable = ArrayRef(BroadcastTable4);
157*0fca6ea1SDimitry Andric   else
158*0fca6ea1SDimitry Andric     return nullptr;
159*0fca6ea1SDimitry Andric 
160*0fca6ea1SDimitry Andric   return lookupFoldTableImpl(FoldTable, RegOp);
161*0fca6ea1SDimitry Andric }
162*0fca6ea1SDimitry Andric 
1630b57cec5SDimitry Andric namespace {
1640b57cec5SDimitry Andric 
1650b57cec5SDimitry Andric // This class stores the memory unfolding tables. It is instantiated as a
166753f127fSDimitry Andric // function scope static variable to lazily init the unfolding table.
1670b57cec5SDimitry Andric struct X86MemUnfoldTable {
1680b57cec5SDimitry Andric   // Stores memory unfolding tables entries sorted by opcode.
1695f757f3fSDimitry Andric   std::vector<X86FoldTableEntry> Table;
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric   X86MemUnfoldTable() {
1725f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : Table2Addr)
1730b57cec5SDimitry Andric       // Index 0, folded load and store, no alignment requirement.
1740b57cec5SDimitry Andric       addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
1750b57cec5SDimitry Andric 
1765f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : Table0)
1770b57cec5SDimitry Andric       // Index 0, mix of loads and stores.
1780b57cec5SDimitry Andric       addTableEntry(Entry, TB_INDEX_0);
1790b57cec5SDimitry Andric 
1805f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : Table1)
1810b57cec5SDimitry Andric       // Index 1, folded load
1820b57cec5SDimitry Andric       addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
1830b57cec5SDimitry Andric 
1845f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : Table2)
1850b57cec5SDimitry Andric       // Index 2, folded load
1860b57cec5SDimitry Andric       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
1870b57cec5SDimitry Andric 
1885f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : Table3)
1890b57cec5SDimitry Andric       // Index 3, folded load
1900b57cec5SDimitry Andric       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
1910b57cec5SDimitry Andric 
1925f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : Table4)
1930b57cec5SDimitry Andric       // Index 4, folded load
1940b57cec5SDimitry Andric       addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
1950b57cec5SDimitry Andric 
1968bcb0991SDimitry Andric     // Broadcast tables.
197*0fca6ea1SDimitry Andric     for (const X86FoldTableEntry &Entry : BroadcastTable1)
198*0fca6ea1SDimitry Andric       // Index 1, folded broadcast
199*0fca6ea1SDimitry Andric       addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
200*0fca6ea1SDimitry Andric 
2015f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : BroadcastTable2)
2028bcb0991SDimitry Andric       // Index 2, folded broadcast
203*0fca6ea1SDimitry Andric       addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
2048bcb0991SDimitry Andric 
2055f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : BroadcastTable3)
2065ffd83dbSDimitry Andric       // Index 3, folded broadcast
207*0fca6ea1SDimitry Andric       addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
2088bcb0991SDimitry Andric 
2095f757f3fSDimitry Andric     for (const X86FoldTableEntry &Entry : BroadcastTable4)
2105f757f3fSDimitry Andric       // Index 4, folded broadcast
211*0fca6ea1SDimitry Andric       addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
2125f757f3fSDimitry Andric 
2130b57cec5SDimitry Andric     // Sort the memory->reg unfold table.
2140b57cec5SDimitry Andric     array_pod_sort(Table.begin(), Table.end());
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric     // Now that it's sorted, ensure its unique.
2170b57cec5SDimitry Andric     assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
2180b57cec5SDimitry Andric            "Memory unfolding table is not unique!");
2190b57cec5SDimitry Andric   }
2200b57cec5SDimitry Andric 
221*0fca6ea1SDimitry Andric   void addTableEntry(const X86FoldTableEntry &Entry, uint16_t ExtraFlags) {
2220b57cec5SDimitry Andric     // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
2230b57cec5SDimitry Andric     if ((Entry.Flags & TB_NO_REVERSE) == 0)
2240b57cec5SDimitry Andric       Table.push_back({Entry.DstOp, Entry.KeyOp,
2250b57cec5SDimitry Andric                        static_cast<uint16_t>(Entry.Flags | ExtraFlags)});
2260b57cec5SDimitry Andric   }
2270b57cec5SDimitry Andric };
228*0fca6ea1SDimitry Andric } // namespace
2290b57cec5SDimitry Andric 
230*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupUnfoldTable(unsigned MemOp) {
231753f127fSDimitry Andric   static X86MemUnfoldTable MemUnfoldTable;
232753f127fSDimitry Andric   auto &Table = MemUnfoldTable.Table;
2330b57cec5SDimitry Andric   auto I = llvm::lower_bound(Table, MemOp);
2340b57cec5SDimitry Andric   if (I != Table.end() && I->KeyOp == MemOp)
2350b57cec5SDimitry Andric     return &*I;
2360b57cec5SDimitry Andric   return nullptr;
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric 
23906c3fb27SDimitry Andric namespace {
24006c3fb27SDimitry Andric 
24106c3fb27SDimitry Andric // This class stores the memory -> broadcast folding tables. It is instantiated
24206c3fb27SDimitry Andric // as a function scope static variable to lazily init the folding table.
2435f757f3fSDimitry Andric struct X86BroadcastFoldTable {
24406c3fb27SDimitry Andric   // Stores memory broadcast folding tables entries sorted by opcode.
2455f757f3fSDimitry Andric   std::vector<X86FoldTableEntry> Table;
24606c3fb27SDimitry Andric 
2475f757f3fSDimitry Andric   X86BroadcastFoldTable() {
24806c3fb27SDimitry Andric     // Broadcast tables.
2495f757f3fSDimitry Andric     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) {
25006c3fb27SDimitry Andric       unsigned RegOp = Reg2Bcst.KeyOp;
25106c3fb27SDimitry Andric       unsigned BcstOp = Reg2Bcst.DstOp;
2525f757f3fSDimitry Andric       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
25306c3fb27SDimitry Andric         unsigned MemOp = Reg2Mem->DstOp;
254*0fca6ea1SDimitry Andric         uint16_t Flags =
255*0fca6ea1SDimitry Andric             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD;
25606c3fb27SDimitry Andric         Table.push_back({MemOp, BcstOp, Flags});
25706c3fb27SDimitry Andric       }
25806c3fb27SDimitry Andric     }
2595f757f3fSDimitry Andric     for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) {
26006c3fb27SDimitry Andric       unsigned RegOp = Reg2Bcst.KeyOp;
26106c3fb27SDimitry Andric       unsigned BcstOp = Reg2Bcst.DstOp;
2625f757f3fSDimitry Andric       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
26306c3fb27SDimitry Andric         unsigned MemOp = Reg2Mem->DstOp;
264*0fca6ea1SDimitry Andric         uint16_t Flags =
265*0fca6ea1SDimitry Andric             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD;
26606c3fb27SDimitry Andric         Table.push_back({MemOp, BcstOp, Flags});
26706c3fb27SDimitry Andric       }
26806c3fb27SDimitry Andric     }
26906c3fb27SDimitry Andric 
2705f757f3fSDimitry Andric     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) {
27106c3fb27SDimitry Andric       unsigned RegOp = Reg2Bcst.KeyOp;
27206c3fb27SDimitry Andric       unsigned BcstOp = Reg2Bcst.DstOp;
2735f757f3fSDimitry Andric       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
27406c3fb27SDimitry Andric         unsigned MemOp = Reg2Mem->DstOp;
275*0fca6ea1SDimitry Andric         uint16_t Flags =
276*0fca6ea1SDimitry Andric             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD;
27706c3fb27SDimitry Andric         Table.push_back({MemOp, BcstOp, Flags});
27806c3fb27SDimitry Andric       }
27906c3fb27SDimitry Andric     }
2805f757f3fSDimitry Andric     for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) {
28106c3fb27SDimitry Andric       unsigned RegOp = Reg2Bcst.KeyOp;
28206c3fb27SDimitry Andric       unsigned BcstOp = Reg2Bcst.DstOp;
2835f757f3fSDimitry Andric       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
28406c3fb27SDimitry Andric         unsigned MemOp = Reg2Mem->DstOp;
285*0fca6ea1SDimitry Andric         uint16_t Flags =
286*0fca6ea1SDimitry Andric             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD;
28706c3fb27SDimitry Andric         Table.push_back({MemOp, BcstOp, Flags});
28806c3fb27SDimitry Andric       }
28906c3fb27SDimitry Andric     }
29006c3fb27SDimitry Andric 
2915f757f3fSDimitry Andric     for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) {
2925f757f3fSDimitry Andric       unsigned RegOp = Reg2Bcst.KeyOp;
2935f757f3fSDimitry Andric       unsigned BcstOp = Reg2Bcst.DstOp;
2945f757f3fSDimitry Andric       if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) {
2955f757f3fSDimitry Andric         unsigned MemOp = Reg2Mem->DstOp;
296*0fca6ea1SDimitry Andric         uint16_t Flags =
297*0fca6ea1SDimitry Andric             Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | TB_FOLDED_LOAD;
2985f757f3fSDimitry Andric         Table.push_back({MemOp, BcstOp, Flags});
2995f757f3fSDimitry Andric       }
3005f757f3fSDimitry Andric     }
3015f757f3fSDimitry Andric 
30206c3fb27SDimitry Andric     // Sort the memory->broadcast fold table.
30306c3fb27SDimitry Andric     array_pod_sort(Table.begin(), Table.end());
30406c3fb27SDimitry Andric   }
30506c3fb27SDimitry Andric };
30606c3fb27SDimitry Andric } // namespace
30706c3fb27SDimitry Andric 
308*0fca6ea1SDimitry Andric bool llvm::matchBroadcastSize(const X86FoldTableEntry &Entry,
30906c3fb27SDimitry Andric                               unsigned BroadcastBits) {
31006c3fb27SDimitry Andric   switch (Entry.Flags & TB_BCAST_MASK) {
311b3edf446SDimitry Andric   case TB_BCAST_W:
312b3edf446SDimitry Andric   case TB_BCAST_SH:
313b3edf446SDimitry Andric     return BroadcastBits == 16;
31406c3fb27SDimitry Andric   case TB_BCAST_D:
315b3edf446SDimitry Andric   case TB_BCAST_SS:
31606c3fb27SDimitry Andric     return BroadcastBits == 32;
317b3edf446SDimitry Andric   case TB_BCAST_Q:
318b3edf446SDimitry Andric   case TB_BCAST_SD:
319b3edf446SDimitry Andric     return BroadcastBits == 64;
32006c3fb27SDimitry Andric   }
32106c3fb27SDimitry Andric   return false;
32206c3fb27SDimitry Andric }
32306c3fb27SDimitry Andric 
3245f757f3fSDimitry Andric const X86FoldTableEntry *
325*0fca6ea1SDimitry Andric llvm::lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits) {
3265f757f3fSDimitry Andric   static X86BroadcastFoldTable BroadcastFoldTable;
3275f757f3fSDimitry Andric   auto &Table = BroadcastFoldTable.Table;
32806c3fb27SDimitry Andric   for (auto I = llvm::lower_bound(Table, MemOp);
32906c3fb27SDimitry Andric        I != Table.end() && I->KeyOp == MemOp; ++I) {
33006c3fb27SDimitry Andric     if (matchBroadcastSize(*I, BroadcastBits))
33106c3fb27SDimitry Andric       return &*I;
33206c3fb27SDimitry Andric   }
33306c3fb27SDimitry Andric   return nullptr;
33406c3fb27SDimitry Andric }
335