Skip to content

Commit 3f986bd

Browse files
committed
C++: Add alert provenance plumbing.
1 parent b873343 commit 3f986bd

File tree

6 files changed

+129
-106
lines changed

6 files changed

+129
-106
lines changed

cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,10 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { no
282282
/** Extra data-flow steps needed for lambda flow analysis. */
283283
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
284284

285+
predicate knownSourceModel(Node source, string model) { none() }
286+
287+
predicate knownSinkModel(Node sink, string model) { none() }
288+
285289
/**
286290
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
287291
* side-effect, resulting in a summary from `p` to itself.

cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowUtil.qll

Lines changed: 62 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ private module ThisFlow {
514514
*/
515515
cached
516516
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
517-
simpleLocalFlowStep(nodeFrom, nodeTo)
517+
simpleLocalFlowStep(nodeFrom, nodeTo, _)
518518
or
519519
// Field flow is not strictly a "step" but covers the whole function
520520
// transitively. There's no way to get a step-like relation out of the global
@@ -528,64 +528,67 @@ predicate localFlowStep(Node nodeFrom, Node nodeTo) {
528528
* This is the local flow predicate that's used as a building block in global
529529
* data flow. It may have less flow than the `localFlowStep` predicate.
530530
*/
531-
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
532-
// Expr -> Expr
533-
exprToExprStep_nocfg(nodeFrom.asExpr(), nodeTo.asExpr())
534-
or
535-
// Assignment -> LValue post-update node
536-
//
537-
// This is used for assignments whose left-hand side is not a variable
538-
// assignment or a storeStep but is still modeled by other means. It could be
539-
// a call to `operator*` or `operator[]` where taint should flow to the
540-
// post-update node of the qualifier.
541-
exists(AssignExpr assign |
542-
nodeFrom.asExpr() = assign and
543-
nodeTo.(PostUpdateNode).getPreUpdateNode().asExpr() = assign.getLValue()
544-
)
545-
or
546-
// Node -> FlowVar -> VariableAccess
547-
exists(FlowVar var |
548-
(
549-
exprToVarStep(nodeFrom.asExpr(), var)
550-
or
551-
varSourceBaseCase(var, nodeFrom.asParameter())
552-
or
553-
varSourceBaseCase(var, nodeFrom.asUninitialized())
554-
or
555-
var.definedPartiallyAt(nodeFrom.asPartialDefinition())
556-
) and
557-
varToNodeStep(var, nodeTo)
558-
)
559-
or
560-
// Expr -> DefinitionByReferenceNode
561-
exprToDefinitionByReferenceStep(nodeFrom.asExpr(), nodeTo.asDefiningArgument())
562-
or
563-
// `this` -> adjacent-`this`
564-
ThisFlow::adjacentThisRefs(nodeFrom, nodeTo)
565-
or
566-
// post-update-`this` -> following-`this`-ref
567-
ThisFlow::adjacentThisRefs(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
568-
or
569-
// In `f(&x->a)`, this step provides the flow from post-`&` to post-`x->a`,
570-
// from which there is field flow to `x` via reverse read.
571-
exists(PartialDefinition def, Expr inner, Expr outer |
572-
def.definesExpressions(inner, outer) and
573-
inner = nodeTo.(InnerPartialDefinitionNode).getPreUpdateNode().asExpr() and
574-
outer = nodeFrom.(PartialDefinitionNode).getPreUpdateNode().asExpr()
575-
)
576-
or
577-
// Reverse flow: data that flows from the post-update node of a reference
578-
// returned by a function call, back into the qualifier of that function.
579-
// This allows data to flow 'in' through references returned by a modeled
580-
// function such as `operator[]`.
581-
exists(DataFlowFunction f, Call call, FunctionInput inModel, FunctionOutput outModel |
582-
call.getTarget() = f and
583-
inModel.isReturnValueDeref() and
584-
outModel.isQualifierObject() and
585-
f.hasDataFlow(inModel, outModel) and
586-
nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr() = call and
587-
nodeTo.asDefiningArgument() = call.getQualifier()
588-
)
531+
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
532+
(
533+
// Expr -> Expr
534+
exprToExprStep_nocfg(nodeFrom.asExpr(), nodeTo.asExpr())
535+
or
536+
// Assignment -> LValue post-update node
537+
//
538+
// This is used for assignments whose left-hand side is not a variable
539+
// assignment or a storeStep but is still modeled by other means. It could be
540+
// a call to `operator*` or `operator[]` where taint should flow to the
541+
// post-update node of the qualifier.
542+
exists(AssignExpr assign |
543+
nodeFrom.asExpr() = assign and
544+
nodeTo.(PostUpdateNode).getPreUpdateNode().asExpr() = assign.getLValue()
545+
)
546+
or
547+
// Node -> FlowVar -> VariableAccess
548+
exists(FlowVar var |
549+
(
550+
exprToVarStep(nodeFrom.asExpr(), var)
551+
or
552+
varSourceBaseCase(var, nodeFrom.asParameter())
553+
or
554+
varSourceBaseCase(var, nodeFrom.asUninitialized())
555+
or
556+
var.definedPartiallyAt(nodeFrom.asPartialDefinition())
557+
) and
558+
varToNodeStep(var, nodeTo)
559+
)
560+
or
561+
// Expr -> DefinitionByReferenceNode
562+
exprToDefinitionByReferenceStep(nodeFrom.asExpr(), nodeTo.asDefiningArgument())
563+
or
564+
// `this` -> adjacent-`this`
565+
ThisFlow::adjacentThisRefs(nodeFrom, nodeTo)
566+
or
567+
// post-update-`this` -> following-`this`-ref
568+
ThisFlow::adjacentThisRefs(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
569+
or
570+
// In `f(&x->a)`, this step provides the flow from post-`&` to post-`x->a`,
571+
// from which there is field flow to `x` via reverse read.
572+
exists(PartialDefinition def, Expr inner, Expr outer |
573+
def.definesExpressions(inner, outer) and
574+
inner = nodeTo.(InnerPartialDefinitionNode).getPreUpdateNode().asExpr() and
575+
outer = nodeFrom.(PartialDefinitionNode).getPreUpdateNode().asExpr()
576+
)
577+
or
578+
// Reverse flow: data that flows from the post-update node of a reference
579+
// returned by a function call, back into the qualifier of that function.
580+
// This allows data to flow 'in' through references returned by a modeled
581+
// function such as `operator[]`.
582+
exists(DataFlowFunction f, Call call, FunctionInput inModel, FunctionOutput outModel |
583+
call.getTarget() = f and
584+
inModel.isReturnValueDeref() and
585+
outModel.isQualifierObject() and
586+
f.hasDataFlow(inModel, outModel) and
587+
nodeFrom.(PostUpdateNode).getPreUpdateNode().asExpr() = call and
588+
nodeTo.asDefiningArgument() = call.getQualifier()
589+
)
590+
) and
591+
model = ""
589592
}
590593

591594
/**

cpp/ql/lib/semmle/code/cpp/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ predicate localTaintStep(DataFlow::Node src, DataFlow::Node sink) {
3030
* Holds if the additional step from `src` to `sink` should be included in all
3131
* global taint flow configurations.
3232
*/
33-
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {
34-
localAdditionalTaintStep(src, sink)
33+
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink, string model) {
34+
localAdditionalTaintStep(src, sink) and model = ""
3535
}
3636

3737
/**

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,10 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { no
10201020
/** Extra data-flow steps needed for lambda flow analysis. */
10211021
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }
10221022

1023+
predicate knownSourceModel(Node source, string model) { none() }
1024+
1025+
predicate knownSinkModel(Node sink, string model) { none() }
1026+
10231027
/**
10241028
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
10251029
* side-effect, resulting in a summary from `p` to itself.

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1874,7 +1874,7 @@ private module Cached {
18741874
* (intra-procedural) step.
18751875
*/
18761876
cached
1877-
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
1877+
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo, _) }
18781878

18791879
private predicate indirectionOperandFlow(RawIndirectOperand nodeFrom, Node nodeTo) {
18801880
nodeFrom != nodeTo and
@@ -1944,41 +1944,45 @@ private module Cached {
19441944
* data flow. It may have less flow than the `localFlowStep` predicate.
19451945
*/
19461946
cached
1947-
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
1948-
// Post update node -> Node flow
1949-
Ssa::postUpdateFlow(nodeFrom, nodeTo)
1950-
or
1951-
// Def-use/Use-use flow
1952-
Ssa::ssaFlow(nodeFrom, nodeTo)
1953-
or
1954-
// Operand -> Instruction flow
1955-
simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
1956-
or
1957-
// Instruction -> Operand flow
1958-
exists(Instruction iFrom, Operand opTo |
1959-
iFrom = nodeFrom.asInstruction() and opTo = nodeTo.asOperand()
1960-
|
1961-
simpleOperandLocalFlowStep(iFrom, opTo) and
1962-
// Omit when the instruction node also represents the operand.
1963-
not iFrom = Ssa::getIRRepresentationOfOperand(opTo)
1964-
)
1965-
or
1966-
// Phi node -> Node flow
1967-
Ssa::fromPhiNode(nodeFrom, nodeTo)
1968-
or
1969-
// Indirect operand -> (indirect) instruction flow
1970-
indirectionOperandFlow(nodeFrom, nodeTo)
1971-
or
1972-
// Indirect instruction -> indirect operand flow
1973-
indirectionInstructionFlow(nodeFrom, nodeTo)
1947+
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo, string model) {
1948+
(
1949+
// Post update node -> Node flow
1950+
Ssa::postUpdateFlow(nodeFrom, nodeTo)
1951+
or
1952+
// Def-use/Use-use flow
1953+
Ssa::ssaFlow(nodeFrom, nodeTo)
1954+
or
1955+
// Operand -> Instruction flow
1956+
simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
1957+
or
1958+
// Instruction -> Operand flow
1959+
exists(Instruction iFrom, Operand opTo |
1960+
iFrom = nodeFrom.asInstruction() and opTo = nodeTo.asOperand()
1961+
|
1962+
simpleOperandLocalFlowStep(iFrom, opTo) and
1963+
// Omit when the instruction node also represents the operand.
1964+
not iFrom = Ssa::getIRRepresentationOfOperand(opTo)
1965+
)
1966+
or
1967+
// Phi node -> Node flow
1968+
Ssa::fromPhiNode(nodeFrom, nodeTo)
1969+
or
1970+
// Indirect operand -> (indirect) instruction flow
1971+
indirectionOperandFlow(nodeFrom, nodeTo)
1972+
or
1973+
// Indirect instruction -> indirect operand flow
1974+
indirectionInstructionFlow(nodeFrom, nodeTo)
1975+
) and
1976+
model = ""
19741977
or
19751978
// Flow through modeled functions
1976-
modelFlow(nodeFrom, nodeTo)
1979+
modelFlow(nodeFrom, nodeTo, model)
19771980
or
19781981
// Reverse flow: data that flows from the definition node back into the indirection returned
19791982
// by a function. This allows data to flow 'in' through references returned by a modeled
19801983
// function such as `operator[]`.
1981-
reverseFlow(nodeFrom, nodeTo)
1984+
reverseFlow(nodeFrom, nodeTo) and
1985+
model = ""
19821986
}
19831987

19841988
private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
@@ -1993,12 +1997,13 @@ private module Cached {
19931997
opTo.getDef() = iFrom
19941998
}
19951999

1996-
private predicate modelFlow(Node nodeFrom, Node nodeTo) {
2000+
private predicate modelFlow(Node nodeFrom, Node nodeTo, string model) {
19972001
exists(
19982002
CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut
19992003
|
20002004
call.getStaticCallTarget() = func and
2001-
func.hasDataFlow(modelIn, modelOut)
2005+
func.hasDataFlow(modelIn, modelOut) and
2006+
model = "DataFlowFunction"
20022007
|
20032008
nodeFrom = callInput(call, modelIn) and
20042009
nodeTo = callOutput(call, modelOut)

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ private import SsaInternals as Ssa
1414
predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
1515
DataFlow::localFlowStep(nodeFrom, nodeTo)
1616
or
17-
localAdditionalTaintStep(nodeFrom, nodeTo)
17+
localAdditionalTaintStep(nodeFrom, nodeTo, _)
1818
}
1919

2020
/**
@@ -23,20 +23,23 @@ predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
2323
* different objects.
2424
*/
2525
cached
26-
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
27-
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction())
26+
predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo, string model) {
27+
operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction()) and
28+
model = ""
2829
or
29-
modeledTaintStep(nodeFrom, nodeTo)
30+
modeledTaintStep(nodeFrom, nodeTo, model)
3031
or
3132
// Flow from (the indirection of) an operand of a pointer arithmetic instruction to the
3233
// indirection of the pointer arithmetic instruction. This provides flow from `source`
3334
// in `x[source]` to the result of the associated load instruction.
3435
exists(PointerArithmeticInstruction pai, int indirectionIndex |
3536
nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and
3637
hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1)
37-
)
38+
) and
39+
model = ""
3840
or
39-
any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo)
41+
any(Ssa::Indirection ind).isAdditionalTaintStep(nodeFrom, nodeTo) and
42+
model = ""
4043
}
4144

4245
/**
@@ -113,8 +116,8 @@ predicate localExprTaint(Expr e1, Expr e2) {
113116
* Holds if the additional step from `src` to `sink` should be included in all
114117
* global taint flow configurations.
115118
*/
116-
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) {
117-
localAdditionalTaintStep(src, sink)
119+
predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink, string model) {
120+
localAdditionalTaintStep(src, sink, model)
118121
}
119122

120123
/**
@@ -134,7 +137,7 @@ predicate defaultTaintSanitizer(DataFlow::Node node) { none() }
134137
* Holds if taint can flow from `nodeIn` to `nodeOut` through a call to a
135138
* modeled function.
136139
*/
137-
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
140+
predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut, string model) {
138141
// Normal taint steps
139142
exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut |
140143
call.getStaticCallTarget() = func and
@@ -143,7 +146,8 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
143146
nodeIn = callInput(call, modelIn) and nodeOut = callOutput(call, modelOut)
144147
or
145148
exists(int d | nodeIn = callInput(call, modelIn, d) and nodeOut = callOutput(call, modelOut, d))
146-
)
149+
) and
150+
model = "TaintFunction"
147151
or
148152
// Taint flow from one argument to another and data flow from an argument to a
149153
// return value. This happens in functions like `strcat` and `memcpy`. We
@@ -160,7 +164,8 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
160164
func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and
161165
func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and
162166
modelMidOut.isParameterDeref(indexMid) and
163-
modelMidIn.isParameter(indexMid)
167+
modelMidIn.isParameter(indexMid) and
168+
model = "TaintFunction"
164169
)
165170
or
166171
// Taint flow from a pointer argument to an output, when the model specifies flow from the deref
@@ -173,9 +178,11 @@ predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) {
173178
indirectArgument.hasAddressOperandAndIndirectionIndex(nodeIn.asOperand(), _) and
174179
call.getStaticCallTarget() = func and
175180
(
176-
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut)
181+
func.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
182+
model = "DataFlowFunction"
177183
or
178-
func.(TaintFunction).hasTaintFlow(modelIn, modelOut)
184+
func.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
185+
model = "TaintFunction"
179186
) and
180187
nodeOut = callOutput(call, modelOut)
181188
)

0 commit comments

Comments
 (0)