1
0
Fork 0

DBG: improve function analysis and introduce loop analysis

Thanks to DefCon42 for help on the backedge algorithm!
This commit is contained in:
Duncan Ogilvie 2020-06-04 17:17:05 +02:00
parent 5ab09dae92
commit 6f5e786a54
3 changed files with 319 additions and 24 deletions

View File

@ -41,8 +41,8 @@ typedef struct
struct BridgeCFNode
{
duint parentGraph = 0; //function of which this node is a part
duint start = 0; //start of the block
duint end = 0; //end of the block (inclusive)
duint start = 0; //va of the first instruction in the block
duint end = 0; //va of the last instruction in the block (inclusive)
duint brtrue = 0; //destination if condition is true
duint brfalse = 0; //destination if condition is false
duint icount = 0; //number of instructions in node

View File

@ -3,8 +3,12 @@
#include "console.h"
#include "filehelper.h"
#include "function.h"
#include "label.h"
#include "xrefs.h"
#include "plugin_loader.h"
#include "loop.h"
#include <set>
#include <map>
RecursiveAnalysis::RecursiveAnalysis(duint base, duint size, duint entryPoint, bool usePlugins, bool dump)
: Analysis(base, size),
@ -18,34 +22,108 @@ void RecursiveAnalysis::Analyse()
{
//TODO: implement queue to analyze multiple functions
analyzeFunction(mEntryPoint);
analyzeLoops(mEntryPoint);
}
void RecursiveAnalysis::SetMarkers()
{
if(mDump)
for(const auto & function : mFunctions)
FileHelper::WriteAllText(StringUtils::sprintf("cfgraph_%p.dot", function.entryPoint), GraphToDot(function));
FileHelper::WriteAllText(StringUtils::sprintf("cfgraph_%p.dot", function.second.entryPoint), GraphToDot(function.second));
//set function ranges
for(const auto & function : mFunctions)
for(const auto & functionItr : mFunctions)
{
duint start = ~0;
duint end = 0;
duint icount = 0;
for(const auto & node : function.nodes)
// Split functions with multiple chunks (either due to tail calls or PGO)
// Example: kernelbase:KernelBaseBaseDllInitialize
// This algorithm orders basic blocks and then iterates, growing the chunk downwards
// Function ranges are collected in another ordered map for loop insertion
const auto & function = functionItr.second;
std::map<Range, const BridgeCFNode*, RangeCompare> blockRanges, functionRanges;
for(const auto & nodeItr : function.nodes)
{
if(!inRange(node.second.start))
continue;
icount += node.second.icount;
start = min(node.second.start, start);
end = max(node.second.end, end);
const auto & node = nodeItr.second;
if(!blockRanges.emplace(Range(node.start, node.end), &node).second)
dprintf_untranslated("Overlapping basic block %p-%p, please report a bug!\n", node.start, node.end);
}
XrefDelRange(start, end);
if(!FunctionAdd(start, end, false, icount))
auto addFunction = [&function, &functionRanges](duint start, duint end, duint icount)
{
FunctionDelete(start);
FunctionDelete(end);
FunctionAdd(start, end, false, icount);
FunctionDelRange(start, end, false /* Do not override user-defined functions */);
LoopDeleteRange(start, end); // clear loop range in function
XrefDelRange(start, end); // clear xrefs in function
FunctionAdd(start, end, false, icount, function.entryPoint);
functionRanges.emplace(Range(start, end), nullptr);
};
duint rangeStart = 0, rangeEnd = 0, rangeInstructionCount = 0;
for(auto rangeItr = blockRanges.begin(); rangeItr != blockRanges.end(); ++rangeItr)
{
auto disasmLen = [this](duint addr)
{
if(!mCp.Disassemble(addr, translateAddr(addr)))
return 1;
return mCp.Size();
};
const auto & node = *rangeItr->second;
if(!rangeStart)
{
rangeStart = node.start;
rangeEnd = node.end;
rangeInstructionCount = node.icount;
}
#define ALIGN_UP(Address, Align) (((ULONG_PTR)(Address) + (Align) - 1) & ~((Align) - 1))
else if(ALIGN_UP(rangeEnd + disasmLen(rangeEnd), 16) >= node.start)
{
// Merge the consecutive range
rangeEnd = node.end;
rangeInstructionCount += node.icount;
}
else
{
if(mDump)
dprintf_untranslated("Flush partial range %p-%p\n", rangeStart, rangeEnd);
addFunction(rangeStart, rangeEnd, rangeInstructionCount);
rangeStart = node.start;
rangeEnd = node.end;
rangeInstructionCount = node.icount;
}
}
if(mDump)
dprintf_untranslated("Flush range %p-%p\n", rangeStart, rangeEnd);
addFunction(rangeStart, rangeEnd, rangeInstructionCount);
// Collect loop ranges
const auto & loopInfo = mLoopInfo[function.entryPoint];
std::vector<Range> loopRanges;
for(const auto & backedge : loopInfo.backedges)
{
//dprintf("Backedge %p-%p\n", backedge.first, backedge.second);
auto startBlock = backedge.second; // destination is the start of the potential loop range
auto endBlock = backedge.first; // source is the start of the last block in the potential loop range
auto startFunctionItr = functionRanges.find(Range(startBlock, startBlock));
auto endFunctionItr = functionRanges.find(Range(endBlock, endBlock));
if(startFunctionItr != functionRanges.end() && startFunctionItr == endFunctionItr)
{
// Loop ranges can only be in the same function chunk range, otherwise they won't insert/display properly
const auto & endBlockNode = function.nodes.at(endBlock);
loopRanges.emplace_back(startBlock, endBlockNode.end);
}
}
// Order loop ranges by start address so the outermost loop is inserted first
std::sort(loopRanges.begin(), loopRanges.end());
for(const auto & loopRange : loopRanges)
{
if(mDump)
dprintf_untranslated("Loop %p-%p\n", loopRange.first, loopRange.second);
duint loopInstructionCount = 0;
auto blockItr = blockRanges.find(loopRange);
do
{
loopInstructionCount += blockItr->second->icount;
}
while(loopRange.second < blockItr->second->end);
LoopAdd(loopRange.first, loopRange.second, false, loopInstructionCount);
}
}
@ -255,5 +333,213 @@ void RecursiveAnalysis::analyzeFunction(duint entryPoint)
plugincbcall(CB_ANALYZE, &info);
graph = BridgeCFGraph(&info.graph, true);
}
mFunctions.push_back(graph);
mFunctions.emplace(entryPoint, graph);
}
void RecursiveAnalysis::analyzeLoops(duint entryPoint)
{
auto graph = GetFunctionGraph(entryPoint);
if(!graph)
return;
auto & loopInfo = mLoopInfo[entryPoint] = LoopInfo();
loopInfo.functionEntry = entryPoint;
// Detect loops to the same basic block
for(const auto & node : graph->nodes)
for(duint exit : node.second.exits)
if(exit == node.first)
loopInfo.trivialLoops.insert(node.first);
// Thanks to DefCon42 for help with the algorithm!
std::vector<duint> stack;
stack.push_back(entryPoint);
std::set<duint> visited;
std::map<duint, std::vector<duint>> state;
while(!stack.empty())
{
auto start = stack.back();
stack.pop_back();
if(visited.count(start)) //already visited
continue;
visited.insert(start);
state[start].push_back(start);
for(duint exit : graph->nodes.at(start).exits)
{
if(!visited.count(exit))
{
state[exit] = state[start];
stack.push_back(exit);
}
else if(std::count(state[start].begin(), state[start].end(), exit))
{
loopInfo.backedges.emplace(start, exit);
}
}
}
}
void RecursiveAnalysis::dominatorAnalysis(duint entryPoint)
{
auto graph = GetFunctionGraph(entryPoint);
if(!graph)
return;
// WIP algo
// http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf
// https://www.cs.princeton.edu/courses/archive/fall03/cs528/handouts/a%20fast%20algorithm%20for%20finding.pdf
std::map<duint, duint> parent, anchestor, vertex;
std::map<duint, duint> label, semi;
std::map<duint, std::set<duint>> pred, bucket;
std::map<duint, duint> dom;
std::map<duint, duint> indexToAddress, addressToIndex;
std::map<duint, std::set<duint>> succ;
{
size_t curIndex = 1;
for(const auto & node : graph->nodes)
{
indexToAddress[curIndex] = node.first;
addressToIndex[node.first] = curIndex;
curIndex++;
}
for(const auto & node : graph->nodes)
{
auto & s = succ[addressToIndex[node.first]];
for(duint exit : node.second.exits)
{
s.insert(addressToIndex[exit]);
}
}
}
duint r = addressToIndex[entryPoint];
duint n = 0;
std::function<void(duint)> dfs = [&](duint v)
{
semi.at(v) = (n = n + 1);
vertex.at(n) = label.at(v) = v;
anchestor.at(v) = 0;
for(duint w : succ.at(v))
{
if(semi.at(w) == 0)
{
parent.at(w) = v;
dfs(w);
}
pred.at(w).insert(v);
}
};
std::function<void(duint)> compress = [&](duint v)
{
if(anchestor.at(anchestor.at(v)) != 0)
{
compress(anchestor.at(v));
if(semi.at(label.at(anchestor.at(v))) < semi.at(label.at(v)))
label.at(v) = label.at(anchestor.at(v));
anchestor.at(v) = anchestor.at(anchestor.at(v));
}
};
auto eval = [&](duint v)
{
if(anchestor.at(v) == 0)
{
return v;
}
else
{
compress(v);
return label.at(v);
}
};
auto link = [&](duint v, duint w)
{
anchestor.at(w) = v;
};
auto print = [](const char* name, const std::map<duint, duint> & m)
{
dprintf("%s:\n", name);
for(const auto & e : m)
dprintf(" %s[%p] = %p\n", name, e.first, e.second);
};
print("indexToAddress", indexToAddress);
print("addressToIndex", addressToIndex);
// step1
for(duint i = 0; i < succ.size(); i++)
{
auto v = i + 1;
pred[v] = bucket[v];
semi[v] = vertex[v] = anchestor[v] = label[v] = parent[v] = dom[v] = 0;
dprintf("%d\n", v);
}
n = 0;
dfs(r);
print("semi", semi);
print("vertex", vertex);
print("label", label);
print("anchestor", anchestor);
for(duint i = n; i != 1; i--)
{
auto w = vertex.at(i);
// step2
for(duint v : pred.at(w))
{
auto u = eval(v);
if(semi.at(u) < semi.at(w))
{
semi[w] = semi.at(u);
}
bucket.at(vertex.at(semi.at(w))).insert(w);
link(parent.at(w), w);
}
// step3
duint parentw = parent.at(w);
auto & bp = bucket.at(parent.at(w));
for(auto itr = bp.begin(); itr != bp.end(); itr = bp.erase(itr))
{
auto v = *itr;
auto u = eval(v);
if(semi.at(u) < semi.at(v))
{
dom.at(v) = u;
}
else
{
dom.at(v) = parent.at(w);
}
}
}
// step4
for(duint i = 2; i != n; i++)
{
auto w = vertex.at(i);
if(dom.at(w) != vertex.at(semi.at(w)))
{
dom[w] = dom[dom[w]];
}
}
dom.at(r) = 0;
print("semi", semi);
//succ(v)
for(const auto & d : dom)
{
dprintf("dom[%d] = %d\n", d.first, d.second);
}
for(const auto & x : indexToAddress)
{
char label[256];
sprintf_s(label, "block%p", x.first);
LabelSet(x.second, label, false, true);
}
}

View File

@ -71,15 +71,13 @@ public:
const CFGraph* GetFunctionGraph(duint entry) const
{
for(const auto & function : mFunctions)
if(function.entryPoint == entry)
return &function;
return nullptr;
auto itr = mFunctions.find(entry);
return itr == mFunctions.end() ? nullptr : &itr->second;
}
protected:
duint mEntryPoint;
std::vector<CFGraph> mFunctions;
std::unordered_map<duint, CFGraph> mFunctions;
private:
bool mUsePlugins;
@ -93,5 +91,16 @@ private:
std::vector<XREF> mXrefs;
struct LoopInfo
{
duint functionEntry = 0;
std::unordered_set<duint> trivialLoops; // loops to the same basic block
std::unordered_map<duint, duint> backedges; // backedges in the CFG
};
std::unordered_map<duint, LoopInfo> mLoopInfo;
void analyzeFunction(duint entryPoint);
void analyzeLoops(duint entryPoint);
void dominatorAnalysis(duint entryPoint);
};