diff --git a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def index 29aa6a3b8a16e..737bc8e86cfb6 100644 --- a/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def +++ b/clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def @@ -407,6 +407,11 @@ ANALYZER_OPTION( ANALYZER_OPTION(unsigned, MaxSymbolComplexity, "max-symbol-complexity", "The maximum complexity of symbolic constraint.", 35) +// HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570 +// Ideally, we should get rid of this option soon. +ANALYZER_OPTION(unsigned, MaxTaintedSymbolComplexity, "max-tainted-symbol-complexity", + "[DEPRECATED] The maximum complexity of a symbol to carry taint", 9) + ANALYZER_OPTION(unsigned, MaxTimesInlineLarge, "max-times-inline-large", "The maximum times a large function could be inlined.", 32) diff --git a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp index 6362c82b009d7..0bb5739db4b75 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Taint.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Taint.cpp @@ -12,6 +12,7 @@ #include "clang/StaticAnalyzer/Checkers/Taint.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include @@ -256,6 +257,12 @@ std::vector taint::getTaintedSymbolsImpl(ProgramStateRef State, if (!Sym) return TaintedSymbols; + // HACK:https://discourse.llvm.org/t/rfc-make-istainted-and-complex-symbols-friends/79570 + if (const auto &Opts = State->getAnalysisManager().getAnalyzerOptions(); + Sym->computeComplexity() > Opts.MaxTaintedSymbolComplexity) { + return {}; + } + // Traverse all the symbols this symbol depends on to see if any are tainted. for (SymbolRef SubSym : Sym->symbols()) { if (!isa(SubSym)) diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index b8dbcdd7e55af..8eb869bac46f8 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -94,6 +94,7 @@ // CHECK-NEXT: max-inlinable-size = 100 // CHECK-NEXT: max-nodes = 225000 // CHECK-NEXT: max-symbol-complexity = 35 +// CHECK-NEXT: max-tainted-symbol-complexity = 9 // CHECK-NEXT: max-times-inline-large = 32 // CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14 // CHECK-NEXT: mode = deep diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c index b0df85f237298..1c139312734bc 100644 --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -63,6 +63,7 @@ void clang_analyzer_isTainted_char(char); void clang_analyzer_isTainted_wchar(wchar_t); void clang_analyzer_isTainted_charp(char*); void clang_analyzer_isTainted_int(int); +void clang_analyzer_dump_int(int); int coin(); @@ -459,7 +460,53 @@ unsigned radar11369570_hanging(const unsigned char *arr, int l) { longcmp(a, t, c); l -= 12; } - return 5/a; // expected-warning {{Division by a tainted value, possibly zero}} + return 5/a; // FIXME: Should be a "div by tainted" warning here. +} + +// This computation used to take a very long time. +void complex_taint_queries(const int *p) { + int tainted = 0; + scanf("%d", &tainted); + + // Make "tmp" tainted. + int tmp = tainted + tainted; + clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}} + + // Make "tmp" SymExpr a lot more complicated by applying computation. + // This should balloon the symbol complexity. + tmp += p[0] + p[0]; + tmp += p[1] + p[1]; + tmp += p[2] + p[2]; + clang_analyzer_dump_int(tmp); // expected-warning{{((((conj_}} symbol complexity: 8 + clang_analyzer_isTainted_int(tmp); // expected-warning{{YES}} + + tmp += p[3] + p[3]; + clang_analyzer_dump_int(tmp); // expected-warning{{(((((conj_}} symbol complexity: 10 + clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} 10 is already too complex to be traversed + + tmp += p[4] + p[4]; + tmp += p[5] + p[5]; + tmp += p[6] + p[6]; + tmp += p[7] + p[7]; + tmp += p[8] + p[8]; + tmp += p[9] + p[9]; + tmp += p[10] + p[10]; + tmp += p[11] + p[11]; + tmp += p[12] + p[12]; + tmp += p[13] + p[13]; + tmp += p[14] + p[14]; + tmp += p[15] + p[15]; + + // The SymExpr still holds the full history of the computation, yet, "isTainted" doesn't traverse the tree as the complexity is over the threshold. + clang_analyzer_dump_int(tmp); + // expected-warning@-1{{(((((((((((((((((conj_}} symbol complexity: 34 + clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} FIXME: Ideally, this should still result in "tainted". + + // By making it even one step more complex, then it would hit the "max-symbol-complexity" + // threshold and the engine would cut the SymExpr and replace it by a new conjured symbol. + tmp += p[16]; + clang_analyzer_dump_int(tmp); // expected-warning{{conj_}} symbol complexity: 1 + clang_analyzer_isTainted_int(tmp); // expected-warning{{NO}} } // Check that we do not assert of the following code.