DBG: improve function analysis and introduce loop analysis
Thanks to DefCon42 for help on the backedge algorithm!
This commit is contained in:
parent
5ab09dae92
commit
6f5e786a54
|
@ -41,8 +41,8 @@ typedef struct
|
|||
struct BridgeCFNode
|
||||
{
|
||||
duint parentGraph = 0; //function of which this node is a part
|
||||
duint start = 0; //start of the block
|
||||
duint end = 0; //end of the block (inclusive)
|
||||
duint start = 0; //va of the first instruction in the block
|
||||
duint end = 0; //va of the last instruction in the block (inclusive)
|
||||
duint brtrue = 0; //destination if condition is true
|
||||
duint brfalse = 0; //destination if condition is false
|
||||
duint icount = 0; //number of instructions in node
|
||||
|
|
|
@ -3,8 +3,12 @@
|
|||
#include "console.h"
|
||||
#include "filehelper.h"
|
||||
#include "function.h"
|
||||
#include "label.h"
|
||||
#include "xrefs.h"
|
||||
#include "plugin_loader.h"
|
||||
#include "loop.h"
|
||||
#include <set>
|
||||
#include <map>
|
||||
|
||||
RecursiveAnalysis::RecursiveAnalysis(duint base, duint size, duint entryPoint, bool usePlugins, bool dump)
|
||||
: Analysis(base, size),
|
||||
|
@ -18,34 +22,108 @@ void RecursiveAnalysis::Analyse()
|
|||
{
|
||||
//TODO: implement queue to analyze multiple functions
|
||||
analyzeFunction(mEntryPoint);
|
||||
analyzeLoops(mEntryPoint);
|
||||
}
|
||||
|
||||
void RecursiveAnalysis::SetMarkers()
|
||||
{
|
||||
if(mDump)
|
||||
for(const auto & function : mFunctions)
|
||||
FileHelper::WriteAllText(StringUtils::sprintf("cfgraph_%p.dot", function.entryPoint), GraphToDot(function));
|
||||
FileHelper::WriteAllText(StringUtils::sprintf("cfgraph_%p.dot", function.second.entryPoint), GraphToDot(function.second));
|
||||
|
||||
//set function ranges
|
||||
for(const auto & function : mFunctions)
|
||||
for(const auto & functionItr : mFunctions)
|
||||
{
|
||||
duint start = ~0;
|
||||
duint end = 0;
|
||||
duint icount = 0;
|
||||
for(const auto & node : function.nodes)
|
||||
// Split functions with multiple chunks (either due to tail calls or PGO)
|
||||
// Example: kernelbase:KernelBaseBaseDllInitialize
|
||||
// This algorithm orders basic blocks and then iterates, growing the chunk downwards
|
||||
// Function ranges are collected in another ordered map for loop insertion
|
||||
const auto & function = functionItr.second;
|
||||
std::map<Range, const BridgeCFNode*, RangeCompare> blockRanges, functionRanges;
|
||||
for(const auto & nodeItr : function.nodes)
|
||||
{
|
||||
if(!inRange(node.second.start))
|
||||
continue;
|
||||
icount += node.second.icount;
|
||||
start = min(node.second.start, start);
|
||||
end = max(node.second.end, end);
|
||||
const auto & node = nodeItr.second;
|
||||
if(!blockRanges.emplace(Range(node.start, node.end), &node).second)
|
||||
dprintf_untranslated("Overlapping basic block %p-%p, please report a bug!\n", node.start, node.end);
|
||||
}
|
||||
XrefDelRange(start, end);
|
||||
if(!FunctionAdd(start, end, false, icount))
|
||||
|
||||
auto addFunction = [&function, &functionRanges](duint start, duint end, duint icount)
|
||||
{
|
||||
FunctionDelete(start);
|
||||
FunctionDelete(end);
|
||||
FunctionAdd(start, end, false, icount);
|
||||
FunctionDelRange(start, end, false /* Do not override user-defined functions */);
|
||||
LoopDeleteRange(start, end); // clear loop range in function
|
||||
XrefDelRange(start, end); // clear xrefs in function
|
||||
FunctionAdd(start, end, false, icount, function.entryPoint);
|
||||
functionRanges.emplace(Range(start, end), nullptr);
|
||||
};
|
||||
|
||||
duint rangeStart = 0, rangeEnd = 0, rangeInstructionCount = 0;
|
||||
for(auto rangeItr = blockRanges.begin(); rangeItr != blockRanges.end(); ++rangeItr)
|
||||
{
|
||||
auto disasmLen = [this](duint addr)
|
||||
{
|
||||
if(!mCp.Disassemble(addr, translateAddr(addr)))
|
||||
return 1;
|
||||
return mCp.Size();
|
||||
};
|
||||
const auto & node = *rangeItr->second;
|
||||
if(!rangeStart)
|
||||
{
|
||||
rangeStart = node.start;
|
||||
rangeEnd = node.end;
|
||||
rangeInstructionCount = node.icount;
|
||||
}
|
||||
#define ALIGN_UP(Address, Align) (((ULONG_PTR)(Address) + (Align) - 1) & ~((Align) - 1))
|
||||
else if(ALIGN_UP(rangeEnd + disasmLen(rangeEnd), 16) >= node.start)
|
||||
{
|
||||
// Merge the consecutive range
|
||||
rangeEnd = node.end;
|
||||
rangeInstructionCount += node.icount;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(mDump)
|
||||
dprintf_untranslated("Flush partial range %p-%p\n", rangeStart, rangeEnd);
|
||||
addFunction(rangeStart, rangeEnd, rangeInstructionCount);
|
||||
rangeStart = node.start;
|
||||
rangeEnd = node.end;
|
||||
rangeInstructionCount = node.icount;
|
||||
}
|
||||
}
|
||||
if(mDump)
|
||||
dprintf_untranslated("Flush range %p-%p\n", rangeStart, rangeEnd);
|
||||
addFunction(rangeStart, rangeEnd, rangeInstructionCount);
|
||||
|
||||
// Collect loop ranges
|
||||
const auto & loopInfo = mLoopInfo[function.entryPoint];
|
||||
std::vector<Range> loopRanges;
|
||||
for(const auto & backedge : loopInfo.backedges)
|
||||
{
|
||||
//dprintf("Backedge %p-%p\n", backedge.first, backedge.second);
|
||||
auto startBlock = backedge.second; // destination is the start of the potential loop range
|
||||
auto endBlock = backedge.first; // source is the start of the last block in the potential loop range
|
||||
auto startFunctionItr = functionRanges.find(Range(startBlock, startBlock));
|
||||
auto endFunctionItr = functionRanges.find(Range(endBlock, endBlock));
|
||||
if(startFunctionItr != functionRanges.end() && startFunctionItr == endFunctionItr)
|
||||
{
|
||||
// Loop ranges can only be in the same function chunk range, otherwise they won't insert/display properly
|
||||
const auto & endBlockNode = function.nodes.at(endBlock);
|
||||
loopRanges.emplace_back(startBlock, endBlockNode.end);
|
||||
}
|
||||
}
|
||||
// Order loop ranges by start address so the outermost loop is inserted first
|
||||
std::sort(loopRanges.begin(), loopRanges.end());
|
||||
for(const auto & loopRange : loopRanges)
|
||||
{
|
||||
if(mDump)
|
||||
dprintf_untranslated("Loop %p-%p\n", loopRange.first, loopRange.second);
|
||||
duint loopInstructionCount = 0;
|
||||
auto blockItr = blockRanges.find(loopRange);
|
||||
do
|
||||
{
|
||||
loopInstructionCount += blockItr->second->icount;
|
||||
}
|
||||
while(loopRange.second < blockItr->second->end);
|
||||
LoopAdd(loopRange.first, loopRange.second, false, loopInstructionCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,5 +333,213 @@ void RecursiveAnalysis::analyzeFunction(duint entryPoint)
|
|||
plugincbcall(CB_ANALYZE, &info);
|
||||
graph = BridgeCFGraph(&info.graph, true);
|
||||
}
|
||||
mFunctions.push_back(graph);
|
||||
mFunctions.emplace(entryPoint, graph);
|
||||
}
|
||||
|
||||
void RecursiveAnalysis::analyzeLoops(duint entryPoint)
|
||||
{
|
||||
auto graph = GetFunctionGraph(entryPoint);
|
||||
if(!graph)
|
||||
return;
|
||||
|
||||
auto & loopInfo = mLoopInfo[entryPoint] = LoopInfo();
|
||||
loopInfo.functionEntry = entryPoint;
|
||||
|
||||
// Detect loops to the same basic block
|
||||
for(const auto & node : graph->nodes)
|
||||
for(duint exit : node.second.exits)
|
||||
if(exit == node.first)
|
||||
loopInfo.trivialLoops.insert(node.first);
|
||||
|
||||
// Thanks to DefCon42 for help with the algorithm!
|
||||
std::vector<duint> stack;
|
||||
stack.push_back(entryPoint);
|
||||
std::set<duint> visited;
|
||||
std::map<duint, std::vector<duint>> state;
|
||||
|
||||
while(!stack.empty())
|
||||
{
|
||||
auto start = stack.back();
|
||||
stack.pop_back();
|
||||
if(visited.count(start)) //already visited
|
||||
continue;
|
||||
visited.insert(start);
|
||||
state[start].push_back(start);
|
||||
for(duint exit : graph->nodes.at(start).exits)
|
||||
{
|
||||
if(!visited.count(exit))
|
||||
{
|
||||
state[exit] = state[start];
|
||||
stack.push_back(exit);
|
||||
}
|
||||
else if(std::count(state[start].begin(), state[start].end(), exit))
|
||||
{
|
||||
loopInfo.backedges.emplace(start, exit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RecursiveAnalysis::dominatorAnalysis(duint entryPoint)
|
||||
{
|
||||
auto graph = GetFunctionGraph(entryPoint);
|
||||
if(!graph)
|
||||
return;
|
||||
|
||||
// WIP algo
|
||||
|
||||
// http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf
|
||||
// https://www.cs.princeton.edu/courses/archive/fall03/cs528/handouts/a%20fast%20algorithm%20for%20finding.pdf
|
||||
std::map<duint, duint> parent, anchestor, vertex;
|
||||
std::map<duint, duint> label, semi;
|
||||
std::map<duint, std::set<duint>> pred, bucket;
|
||||
std::map<duint, duint> dom;
|
||||
|
||||
std::map<duint, duint> indexToAddress, addressToIndex;
|
||||
std::map<duint, std::set<duint>> succ;
|
||||
{
|
||||
size_t curIndex = 1;
|
||||
for(const auto & node : graph->nodes)
|
||||
{
|
||||
indexToAddress[curIndex] = node.first;
|
||||
addressToIndex[node.first] = curIndex;
|
||||
curIndex++;
|
||||
}
|
||||
for(const auto & node : graph->nodes)
|
||||
{
|
||||
auto & s = succ[addressToIndex[node.first]];
|
||||
for(duint exit : node.second.exits)
|
||||
{
|
||||
s.insert(addressToIndex[exit]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
duint r = addressToIndex[entryPoint];
|
||||
duint n = 0;
|
||||
std::function<void(duint)> dfs = [&](duint v)
|
||||
{
|
||||
semi.at(v) = (n = n + 1);
|
||||
vertex.at(n) = label.at(v) = v;
|
||||
anchestor.at(v) = 0;
|
||||
for(duint w : succ.at(v))
|
||||
{
|
||||
if(semi.at(w) == 0)
|
||||
{
|
||||
parent.at(w) = v;
|
||||
dfs(w);
|
||||
}
|
||||
pred.at(w).insert(v);
|
||||
}
|
||||
};
|
||||
|
||||
std::function<void(duint)> compress = [&](duint v)
|
||||
{
|
||||
if(anchestor.at(anchestor.at(v)) != 0)
|
||||
{
|
||||
compress(anchestor.at(v));
|
||||
if(semi.at(label.at(anchestor.at(v))) < semi.at(label.at(v)))
|
||||
label.at(v) = label.at(anchestor.at(v));
|
||||
anchestor.at(v) = anchestor.at(anchestor.at(v));
|
||||
}
|
||||
};
|
||||
|
||||
auto eval = [&](duint v)
|
||||
{
|
||||
if(anchestor.at(v) == 0)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
else
|
||||
{
|
||||
compress(v);
|
||||
return label.at(v);
|
||||
}
|
||||
};
|
||||
|
||||
auto link = [&](duint v, duint w)
|
||||
{
|
||||
anchestor.at(w) = v;
|
||||
};
|
||||
|
||||
auto print = [](const char* name, const std::map<duint, duint> & m)
|
||||
{
|
||||
dprintf("%s:\n", name);
|
||||
for(const auto & e : m)
|
||||
dprintf(" %s[%p] = %p\n", name, e.first, e.second);
|
||||
};
|
||||
|
||||
print("indexToAddress", indexToAddress);
|
||||
print("addressToIndex", addressToIndex);
|
||||
|
||||
// step1
|
||||
for(duint i = 0; i < succ.size(); i++)
|
||||
{
|
||||
auto v = i + 1;
|
||||
pred[v] = bucket[v];
|
||||
semi[v] = vertex[v] = anchestor[v] = label[v] = parent[v] = dom[v] = 0;
|
||||
dprintf("%d\n", v);
|
||||
}
|
||||
n = 0;
|
||||
dfs(r);
|
||||
|
||||
print("semi", semi);
|
||||
print("vertex", vertex);
|
||||
print("label", label);
|
||||
print("anchestor", anchestor);
|
||||
|
||||
for(duint i = n; i != 1; i--)
|
||||
{
|
||||
auto w = vertex.at(i);
|
||||
// step2
|
||||
for(duint v : pred.at(w))
|
||||
{
|
||||
auto u = eval(v);
|
||||
if(semi.at(u) < semi.at(w))
|
||||
{
|
||||
semi[w] = semi.at(u);
|
||||
}
|
||||
bucket.at(vertex.at(semi.at(w))).insert(w);
|
||||
link(parent.at(w), w);
|
||||
}
|
||||
// step3
|
||||
duint parentw = parent.at(w);
|
||||
auto & bp = bucket.at(parent.at(w));
|
||||
for(auto itr = bp.begin(); itr != bp.end(); itr = bp.erase(itr))
|
||||
{
|
||||
auto v = *itr;
|
||||
auto u = eval(v);
|
||||
if(semi.at(u) < semi.at(v))
|
||||
{
|
||||
dom.at(v) = u;
|
||||
}
|
||||
else
|
||||
{
|
||||
dom.at(v) = parent.at(w);
|
||||
}
|
||||
}
|
||||
}
|
||||
// step4
|
||||
for(duint i = 2; i != n; i++)
|
||||
{
|
||||
auto w = vertex.at(i);
|
||||
if(dom.at(w) != vertex.at(semi.at(w)))
|
||||
{
|
||||
dom[w] = dom[dom[w]];
|
||||
}
|
||||
}
|
||||
dom.at(r) = 0;
|
||||
print("semi", semi);
|
||||
//succ(v)
|
||||
for(const auto & d : dom)
|
||||
{
|
||||
dprintf("dom[%d] = %d\n", d.first, d.second);
|
||||
}
|
||||
|
||||
for(const auto & x : indexToAddress)
|
||||
{
|
||||
char label[256];
|
||||
sprintf_s(label, "block%p", x.first);
|
||||
LabelSet(x.second, label, false, true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,15 +71,13 @@ public:
|
|||
|
||||
const CFGraph* GetFunctionGraph(duint entry) const
|
||||
{
|
||||
for(const auto & function : mFunctions)
|
||||
if(function.entryPoint == entry)
|
||||
return &function;
|
||||
return nullptr;
|
||||
auto itr = mFunctions.find(entry);
|
||||
return itr == mFunctions.end() ? nullptr : &itr->second;
|
||||
}
|
||||
|
||||
protected:
|
||||
duint mEntryPoint;
|
||||
std::vector<CFGraph> mFunctions;
|
||||
std::unordered_map<duint, CFGraph> mFunctions;
|
||||
|
||||
private:
|
||||
bool mUsePlugins;
|
||||
|
@ -93,5 +91,16 @@ private:
|
|||
|
||||
std::vector<XREF> mXrefs;
|
||||
|
||||
struct LoopInfo
|
||||
{
|
||||
duint functionEntry = 0;
|
||||
std::unordered_set<duint> trivialLoops; // loops to the same basic block
|
||||
std::unordered_map<duint, duint> backedges; // backedges in the CFG
|
||||
};
|
||||
|
||||
std::unordered_map<duint, LoopInfo> mLoopInfo;
|
||||
|
||||
void analyzeFunction(duint entryPoint);
|
||||
void analyzeLoops(duint entryPoint);
|
||||
void dominatorAnalysis(duint entryPoint);
|
||||
};
|
Loading…
Reference in New Issue