10b57cec5SDimitry Andric //===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains the X86 memory folding tables. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "X86InstrFoldTables.h" 140b57cec5SDimitry Andric #include "X86InstrInfo.h" 150b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 16753f127fSDimitry Andric #include <atomic> 170b57cec5SDimitry Andric #include <vector> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric using namespace llvm; 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric // These tables are sorted by their RegOp value allowing them to be binary 220b57cec5SDimitry Andric // searched at runtime without the need for additional storage. The enum values 230b57cec5SDimitry Andric // are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which 240b57cec5SDimitry Andric // makes sorting these tables a simple matter of alphabetizing the table. 2506c3fb27SDimitry Andric #include "X86GenFoldTables.inc" 268bcb0991SDimitry Andric 2706c3fb27SDimitry Andric // Table to map instructions safe to broadcast using a different width from the 2806c3fb27SDimitry Andric // element width. 295f757f3fSDimitry Andric static const X86FoldTableEntry BroadcastSizeTable2[] = { 3006c3fb27SDimitry Andric { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS }, 3106c3fb27SDimitry Andric { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS }, 3206c3fb27SDimitry Andric { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS }, 3306c3fb27SDimitry Andric { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD }, 3406c3fb27SDimitry Andric { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD }, 3506c3fb27SDimitry Andric { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD }, 3606c3fb27SDimitry Andric { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS }, 3706c3fb27SDimitry Andric { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS }, 3806c3fb27SDimitry Andric { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS }, 3906c3fb27SDimitry Andric { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD }, 4006c3fb27SDimitry Andric { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD }, 4106c3fb27SDimitry Andric { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD }, 4206c3fb27SDimitry Andric { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS }, 4306c3fb27SDimitry Andric { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS }, 4406c3fb27SDimitry Andric { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS }, 4506c3fb27SDimitry Andric { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD }, 4606c3fb27SDimitry Andric { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD }, 4706c3fb27SDimitry Andric { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD }, 4806c3fb27SDimitry Andric { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q }, 4906c3fb27SDimitry Andric { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q }, 5006c3fb27SDimitry Andric { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q }, 5106c3fb27SDimitry Andric { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q }, 5206c3fb27SDimitry Andric { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q }, 5306c3fb27SDimitry Andric { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q }, 5406c3fb27SDimitry Andric { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D }, 5506c3fb27SDimitry Andric { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D }, 5606c3fb27SDimitry Andric { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D }, 5706c3fb27SDimitry Andric { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D }, 5806c3fb27SDimitry Andric { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D }, 5906c3fb27SDimitry Andric { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D }, 6006c3fb27SDimitry Andric { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q }, 6106c3fb27SDimitry Andric { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q }, 6206c3fb27SDimitry Andric { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q }, 6306c3fb27SDimitry Andric { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D }, 6406c3fb27SDimitry Andric { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D }, 6506c3fb27SDimitry Andric { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D }, 6606c3fb27SDimitry Andric { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q }, 6706c3fb27SDimitry Andric { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q }, 6806c3fb27SDimitry Andric { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q }, 6906c3fb27SDimitry Andric { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D }, 7006c3fb27SDimitry Andric { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D }, 7106c3fb27SDimitry Andric { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D }, 7206c3fb27SDimitry Andric { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS }, 7306c3fb27SDimitry Andric { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS }, 7406c3fb27SDimitry Andric { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS }, 7506c3fb27SDimitry Andric { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD }, 7606c3fb27SDimitry Andric { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD }, 7706c3fb27SDimitry Andric { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD }, 7806c3fb27SDimitry Andric }; 7906c3fb27SDimitry Andric 805f757f3fSDimitry Andric static const X86FoldTableEntry BroadcastSizeTable3[] = { 8106c3fb27SDimitry Andric { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q }, 8206c3fb27SDimitry Andric { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q }, 8306c3fb27SDimitry Andric { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q }, 8406c3fb27SDimitry Andric { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D }, 8506c3fb27SDimitry Andric { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D }, 8606c3fb27SDimitry Andric { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D }, 8706c3fb27SDimitry Andric }; 8806c3fb27SDimitry Andric 895f757f3fSDimitry Andric static const X86FoldTableEntry * 905f757f3fSDimitry Andric lookupFoldTableImpl(ArrayRef<X86FoldTableEntry> Table, unsigned RegOp) { 910b57cec5SDimitry Andric #ifndef NDEBUG 925f757f3fSDimitry Andric #define CHECK_SORTED_UNIQUE(TABLE) \ 935f757f3fSDimitry Andric assert(llvm::is_sorted(TABLE) && #TABLE " is not sorted"); \ 945f757f3fSDimitry Andric assert(std::adjacent_find(std::begin(Table), std::end(Table)) == \ 955f757f3fSDimitry Andric std::end(Table) && \ 965f757f3fSDimitry Andric #TABLE " is not unique"); 975f757f3fSDimitry Andric 980b57cec5SDimitry Andric // Make sure the tables are sorted. 990b57cec5SDimitry Andric static std::atomic<bool> FoldTablesChecked(false); 1000b57cec5SDimitry Andric if (!FoldTablesChecked.load(std::memory_order_relaxed)) { 1015f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(Table2Addr) 1025f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(Table0) 1035f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(Table1) 1045f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(Table2) 1055f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(Table3) 1065f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(Table4) 1075f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(BroadcastTable1) 1085f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(BroadcastTable2) 1095f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(BroadcastTable3) 1105f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(BroadcastTable4) 1115f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(BroadcastSizeTable2) 1125f757f3fSDimitry Andric CHECK_SORTED_UNIQUE(BroadcastSizeTable3) 1130b57cec5SDimitry Andric FoldTablesChecked.store(true, std::memory_order_relaxed); 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric #endif 1160b57cec5SDimitry Andric 1175f757f3fSDimitry Andric const X86FoldTableEntry *Data = llvm::lower_bound(Table, RegOp); 1180b57cec5SDimitry Andric if (Data != Table.end() && Data->KeyOp == RegOp && 1190b57cec5SDimitry Andric !(Data->Flags & TB_NO_FORWARD)) 1200b57cec5SDimitry Andric return Data; 1210b57cec5SDimitry Andric return nullptr; 1220b57cec5SDimitry Andric } 1230b57cec5SDimitry Andric 124*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupTwoAddrFoldTable(unsigned RegOp) { 1255f757f3fSDimitry Andric return lookupFoldTableImpl(Table2Addr, RegOp); 1260b57cec5SDimitry Andric } 1270b57cec5SDimitry Andric 128*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) { 1295f757f3fSDimitry Andric ArrayRef<X86FoldTableEntry> FoldTable; 1300b57cec5SDimitry Andric if (OpNum == 0) 1315f757f3fSDimitry Andric FoldTable = ArrayRef(Table0); 1320b57cec5SDimitry Andric else if (OpNum == 1) 1335f757f3fSDimitry Andric FoldTable = ArrayRef(Table1); 1340b57cec5SDimitry Andric else if (OpNum == 2) 1355f757f3fSDimitry Andric FoldTable = ArrayRef(Table2); 1360b57cec5SDimitry Andric else if (OpNum == 3) 1375f757f3fSDimitry Andric FoldTable = ArrayRef(Table3); 1380b57cec5SDimitry Andric else if (OpNum == 4) 1395f757f3fSDimitry Andric FoldTable = ArrayRef(Table4); 1400b57cec5SDimitry Andric else 1410b57cec5SDimitry Andric return nullptr; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric return lookupFoldTableImpl(FoldTable, RegOp); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 146*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupBroadcastFoldTable(unsigned RegOp, 147*0fca6ea1SDimitry Andric unsigned OpNum) { 148*0fca6ea1SDimitry Andric ArrayRef<X86FoldTableEntry> FoldTable; 149*0fca6ea1SDimitry Andric if (OpNum == 1) 150*0fca6ea1SDimitry Andric FoldTable = ArrayRef(BroadcastTable1); 151*0fca6ea1SDimitry Andric else if (OpNum == 2) 152*0fca6ea1SDimitry Andric FoldTable = ArrayRef(BroadcastTable2); 153*0fca6ea1SDimitry Andric else if (OpNum == 3) 154*0fca6ea1SDimitry Andric FoldTable = ArrayRef(BroadcastTable3); 155*0fca6ea1SDimitry Andric else if (OpNum == 4) 156*0fca6ea1SDimitry Andric FoldTable = ArrayRef(BroadcastTable4); 157*0fca6ea1SDimitry Andric else 158*0fca6ea1SDimitry Andric return nullptr; 159*0fca6ea1SDimitry Andric 160*0fca6ea1SDimitry Andric return lookupFoldTableImpl(FoldTable, RegOp); 161*0fca6ea1SDimitry Andric } 162*0fca6ea1SDimitry Andric 1630b57cec5SDimitry Andric namespace { 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric // This class stores the memory unfolding tables. It is instantiated as a 166753f127fSDimitry Andric // function scope static variable to lazily init the unfolding table. 1670b57cec5SDimitry Andric struct X86MemUnfoldTable { 1680b57cec5SDimitry Andric // Stores memory unfolding tables entries sorted by opcode. 1695f757f3fSDimitry Andric std::vector<X86FoldTableEntry> Table; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric X86MemUnfoldTable() { 1725f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : Table2Addr) 1730b57cec5SDimitry Andric // Index 0, folded load and store, no alignment requirement. 1740b57cec5SDimitry Andric addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); 1750b57cec5SDimitry Andric 1765f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : Table0) 1770b57cec5SDimitry Andric // Index 0, mix of loads and stores. 1780b57cec5SDimitry Andric addTableEntry(Entry, TB_INDEX_0); 1790b57cec5SDimitry Andric 1805f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : Table1) 1810b57cec5SDimitry Andric // Index 1, folded load 1820b57cec5SDimitry Andric addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); 1830b57cec5SDimitry Andric 1845f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : Table2) 1850b57cec5SDimitry Andric // Index 2, folded load 1860b57cec5SDimitry Andric addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); 1870b57cec5SDimitry Andric 1885f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : Table3) 1890b57cec5SDimitry Andric // Index 3, folded load 1900b57cec5SDimitry Andric addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); 1910b57cec5SDimitry Andric 1925f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : Table4) 1930b57cec5SDimitry Andric // Index 4, folded load 1940b57cec5SDimitry Andric addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); 1950b57cec5SDimitry Andric 1968bcb0991SDimitry Andric // Broadcast tables. 197*0fca6ea1SDimitry Andric for (const X86FoldTableEntry &Entry : BroadcastTable1) 198*0fca6ea1SDimitry Andric // Index 1, folded broadcast 199*0fca6ea1SDimitry Andric addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD); 200*0fca6ea1SDimitry Andric 2015f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : BroadcastTable2) 2028bcb0991SDimitry Andric // Index 2, folded broadcast 203*0fca6ea1SDimitry Andric addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD); 2048bcb0991SDimitry Andric 2055f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : BroadcastTable3) 2065ffd83dbSDimitry Andric // Index 3, folded broadcast 207*0fca6ea1SDimitry Andric addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD); 2088bcb0991SDimitry Andric 2095f757f3fSDimitry Andric for (const X86FoldTableEntry &Entry : BroadcastTable4) 2105f757f3fSDimitry Andric // Index 4, folded broadcast 211*0fca6ea1SDimitry Andric addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD); 2125f757f3fSDimitry Andric 2130b57cec5SDimitry Andric // Sort the memory->reg unfold table. 2140b57cec5SDimitry Andric array_pod_sort(Table.begin(), Table.end()); 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric // Now that it's sorted, ensure its unique. 2170b57cec5SDimitry Andric assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() && 2180b57cec5SDimitry Andric "Memory unfolding table is not unique!"); 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric 221*0fca6ea1SDimitry Andric void addTableEntry(const X86FoldTableEntry &Entry, uint16_t ExtraFlags) { 2220b57cec5SDimitry Andric // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it. 2230b57cec5SDimitry Andric if ((Entry.Flags & TB_NO_REVERSE) == 0) 2240b57cec5SDimitry Andric Table.push_back({Entry.DstOp, Entry.KeyOp, 2250b57cec5SDimitry Andric static_cast<uint16_t>(Entry.Flags | ExtraFlags)}); 2260b57cec5SDimitry Andric } 2270b57cec5SDimitry Andric }; 228*0fca6ea1SDimitry Andric } // namespace 2290b57cec5SDimitry Andric 230*0fca6ea1SDimitry Andric const X86FoldTableEntry *llvm::lookupUnfoldTable(unsigned MemOp) { 231753f127fSDimitry Andric static X86MemUnfoldTable MemUnfoldTable; 232753f127fSDimitry Andric auto &Table = MemUnfoldTable.Table; 2330b57cec5SDimitry Andric auto I = llvm::lower_bound(Table, MemOp); 2340b57cec5SDimitry Andric if (I != Table.end() && I->KeyOp == MemOp) 2350b57cec5SDimitry Andric return &*I; 2360b57cec5SDimitry Andric return nullptr; 2370b57cec5SDimitry Andric } 2380b57cec5SDimitry Andric 23906c3fb27SDimitry Andric namespace { 24006c3fb27SDimitry Andric 24106c3fb27SDimitry Andric // This class stores the memory -> broadcast folding tables. It is instantiated 24206c3fb27SDimitry Andric // as a function scope static variable to lazily init the folding table. 2435f757f3fSDimitry Andric struct X86BroadcastFoldTable { 24406c3fb27SDimitry Andric // Stores memory broadcast folding tables entries sorted by opcode. 2455f757f3fSDimitry Andric std::vector<X86FoldTableEntry> Table; 24606c3fb27SDimitry Andric 2475f757f3fSDimitry Andric X86BroadcastFoldTable() { 24806c3fb27SDimitry Andric // Broadcast tables. 2495f757f3fSDimitry Andric for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable2) { 25006c3fb27SDimitry Andric unsigned RegOp = Reg2Bcst.KeyOp; 25106c3fb27SDimitry Andric unsigned BcstOp = Reg2Bcst.DstOp; 2525f757f3fSDimitry Andric if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 25306c3fb27SDimitry Andric unsigned MemOp = Reg2Mem->DstOp; 254*0fca6ea1SDimitry Andric uint16_t Flags = 255*0fca6ea1SDimitry Andric Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; 25606c3fb27SDimitry Andric Table.push_back({MemOp, BcstOp, Flags}); 25706c3fb27SDimitry Andric } 25806c3fb27SDimitry Andric } 2595f757f3fSDimitry Andric for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable2) { 26006c3fb27SDimitry Andric unsigned RegOp = Reg2Bcst.KeyOp; 26106c3fb27SDimitry Andric unsigned BcstOp = Reg2Bcst.DstOp; 2625f757f3fSDimitry Andric if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) { 26306c3fb27SDimitry Andric unsigned MemOp = Reg2Mem->DstOp; 264*0fca6ea1SDimitry Andric uint16_t Flags = 265*0fca6ea1SDimitry Andric Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 | TB_FOLDED_LOAD; 26606c3fb27SDimitry Andric Table.push_back({MemOp, BcstOp, Flags}); 26706c3fb27SDimitry Andric } 26806c3fb27SDimitry Andric } 26906c3fb27SDimitry Andric 2705f757f3fSDimitry Andric for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable3) { 27106c3fb27SDimitry Andric unsigned RegOp = Reg2Bcst.KeyOp; 27206c3fb27SDimitry Andric unsigned BcstOp = Reg2Bcst.DstOp; 2735f757f3fSDimitry Andric if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 27406c3fb27SDimitry Andric unsigned MemOp = Reg2Mem->DstOp; 275*0fca6ea1SDimitry Andric uint16_t Flags = 276*0fca6ea1SDimitry Andric Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; 27706c3fb27SDimitry Andric Table.push_back({MemOp, BcstOp, Flags}); 27806c3fb27SDimitry Andric } 27906c3fb27SDimitry Andric } 2805f757f3fSDimitry Andric for (const X86FoldTableEntry &Reg2Bcst : BroadcastSizeTable3) { 28106c3fb27SDimitry Andric unsigned RegOp = Reg2Bcst.KeyOp; 28206c3fb27SDimitry Andric unsigned BcstOp = Reg2Bcst.DstOp; 2835f757f3fSDimitry Andric if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) { 28406c3fb27SDimitry Andric unsigned MemOp = Reg2Mem->DstOp; 285*0fca6ea1SDimitry Andric uint16_t Flags = 286*0fca6ea1SDimitry Andric Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 | TB_FOLDED_LOAD; 28706c3fb27SDimitry Andric Table.push_back({MemOp, BcstOp, Flags}); 28806c3fb27SDimitry Andric } 28906c3fb27SDimitry Andric } 29006c3fb27SDimitry Andric 2915f757f3fSDimitry Andric for (const X86FoldTableEntry &Reg2Bcst : BroadcastTable4) { 2925f757f3fSDimitry Andric unsigned RegOp = Reg2Bcst.KeyOp; 2935f757f3fSDimitry Andric unsigned BcstOp = Reg2Bcst.DstOp; 2945f757f3fSDimitry Andric if (const X86FoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 4)) { 2955f757f3fSDimitry Andric unsigned MemOp = Reg2Mem->DstOp; 296*0fca6ea1SDimitry Andric uint16_t Flags = 297*0fca6ea1SDimitry Andric Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_4 | TB_FOLDED_LOAD; 2985f757f3fSDimitry Andric Table.push_back({MemOp, BcstOp, Flags}); 2995f757f3fSDimitry Andric } 3005f757f3fSDimitry Andric } 3015f757f3fSDimitry Andric 30206c3fb27SDimitry Andric // Sort the memory->broadcast fold table. 30306c3fb27SDimitry Andric array_pod_sort(Table.begin(), Table.end()); 30406c3fb27SDimitry Andric } 30506c3fb27SDimitry Andric }; 30606c3fb27SDimitry Andric } // namespace 30706c3fb27SDimitry Andric 308*0fca6ea1SDimitry Andric bool llvm::matchBroadcastSize(const X86FoldTableEntry &Entry, 30906c3fb27SDimitry Andric unsigned BroadcastBits) { 31006c3fb27SDimitry Andric switch (Entry.Flags & TB_BCAST_MASK) { 311b3edf446SDimitry Andric case TB_BCAST_W: 312b3edf446SDimitry Andric case TB_BCAST_SH: 313b3edf446SDimitry Andric return BroadcastBits == 16; 31406c3fb27SDimitry Andric case TB_BCAST_D: 315b3edf446SDimitry Andric case TB_BCAST_SS: 31606c3fb27SDimitry Andric return BroadcastBits == 32; 317b3edf446SDimitry Andric case TB_BCAST_Q: 318b3edf446SDimitry Andric case TB_BCAST_SD: 319b3edf446SDimitry Andric return BroadcastBits == 64; 32006c3fb27SDimitry Andric } 32106c3fb27SDimitry Andric return false; 32206c3fb27SDimitry Andric } 32306c3fb27SDimitry Andric 3245f757f3fSDimitry Andric const X86FoldTableEntry * 325*0fca6ea1SDimitry Andric llvm::lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits) { 3265f757f3fSDimitry Andric static X86BroadcastFoldTable BroadcastFoldTable; 3275f757f3fSDimitry Andric auto &Table = BroadcastFoldTable.Table; 32806c3fb27SDimitry Andric for (auto I = llvm::lower_bound(Table, MemOp); 32906c3fb27SDimitry Andric I != Table.end() && I->KeyOp == MemOp; ++I) { 33006c3fb27SDimitry Andric if (matchBroadcastSize(*I, BroadcastBits)) 33106c3fb27SDimitry Andric return &*I; 33206c3fb27SDimitry Andric } 33306c3fb27SDimitry Andric return nullptr; 33406c3fb27SDimitry Andric } 335