diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index ffc7236d1e255..6c7a1601402ef 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/AST/CharUnits.h" +#include "clang/AST/ParentMapContext.h" #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Checkers/Taint.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" @@ -34,20 +35,46 @@ using llvm::formatv; namespace { enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint }; -class ArrayBoundCheckerV2 : - public Checker { +struct Messages { + std::string Short, Full; +}; + +// NOTE: The `ArraySubscriptExpr` and `UnaryOperator` callbacks are `PostStmt` +// instead of `PreStmt` because the current implementation passes the whole +// expression to `CheckerContext::getSVal()` which only works after the +// symbolic evaluation of the expression. (To turn them into `PreStmt` +// callbacks, we'd need to duplicate the logic that evaluates these +// expressions.) The `MemberExpr` callback would work as `PreStmt` but it's +// defined as `PostStmt` for the sake of consistency with the other callbacks. +class ArrayBoundCheckerV2 : public Checker, + check::PostStmt, + check::PostStmt> { BugType BT{this, "Out-of-bound access"}; BugType TaintBT{this, "Out-of-bound access", categories::TaintedData}; + void performCheck(const Expr *E, CheckerContext &C) const; + void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind, - NonLoc Offset, std::string RegName, std::string Msg) const; + NonLoc Offset, Messages Msgs) const; static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); + static bool isInAddressOf(const Stmt *S, ASTContext &AC); + public: - void checkLocation(SVal l, bool isLoad, const Stmt *S, - CheckerContext &C) const; + void checkPostStmt(const ArraySubscriptExpr *E, CheckerContext &C) const { + performCheck(E, C); + } + void checkPostStmt(const UnaryOperator *E, CheckerContext &C) const { + if (E->getOpcode() == UO_Deref) + performCheck(E, C); + } + void checkPostStmt(const MemberExpr *E, CheckerContext &C) const { + if (E->isArrow()) + performCheck(E->getBase(), C); + } }; + } // anonymous namespace /// For a given Location that can be represented as a symbolic expression @@ -149,9 +176,11 @@ getSimplifiedOffsets(NonLoc offset, nonloc::ConcreteInt extent, // where the first one corresponds to "value below threshold" and the second // corresponds to "value at or above threshold". Returns {nullptr, nullptr} in // the case when the evaluation fails. +// If the optional argument CheckEquality is true, then use BO_EQ instead of +// the default BO_LT after consistently applying the same simplification steps. static std::pair compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, - SValBuilder &SVB) { + SValBuilder &SVB, bool CheckEquality = false) { if (auto ConcreteThreshold = Threshold.getAs()) { std::tie(Value, Threshold) = getSimplifiedOffsets(Value, *ConcreteThreshold, SVB); } @@ -167,8 +196,10 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold, return {nullptr, State}; } } + const BinaryOperatorKind OpKind = CheckEquality ? BO_EQ : BO_LT; auto BelowThreshold = - SVB.evalBinOpNN(State, BO_LT, Value, Threshold, SVB.getConditionType()).getAs(); + SVB.evalBinOpNN(State, OpKind, Value, Threshold, SVB.getConditionType()) + .getAs(); if (BelowThreshold) return State->assume(*BelowThreshold); @@ -217,16 +248,19 @@ static std::string getShortMsg(OOB_Kind Kind, std::string RegName) { return formatv(ShortMsgTemplates[Kind], RegName); } -static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) { +static Messages getPrecedesMsgs(const SubRegion *Region, NonLoc Offset) { + std::string RegName = getRegionName(Region); SmallString<128> Buf; llvm::raw_svector_ostream Out(Buf); Out << "Access of " << RegName << " at negative byte offset"; if (auto ConcreteIdx = Offset.getAs()) Out << ' ' << ConcreteIdx->getValue(); - return std::string(Buf); + return {getShortMsg(OOB_Precedes, RegName), std::string(Buf)}; } -static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName, - NonLoc Offset, NonLoc Extent, SVal Location) { + +static Messages getExceedsMsgs(ASTContext &ACtx, const SubRegion *Region, + NonLoc Offset, NonLoc Extent, SVal Location) { + std::string RegName = getRegionName(Region); const auto *EReg = Location.getAsRegion()->getAs(); assert(EReg && "this checker only handles element access"); QualType ElemType = EReg->getElementType(); @@ -273,20 +307,18 @@ static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName, Out << "s"; } - return std::string(Buf); -} -static std::string getTaintMsg(std::string RegName) { - SmallString<128> Buf; - llvm::raw_svector_ostream Out(Buf); - Out << "Access of " << RegName - << " with a tainted offset that may be too large"; - return std::string(Buf); + return {getShortMsg(OOB_Exceeds, RegName), std::string(Buf)}; } -void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, - const Stmt *LoadS, - CheckerContext &C) const { +static Messages getTaintMsgs(const SubRegion *Region, const char *OffsetName) { + std::string RegName = getRegionName(Region); + return {formatv("Potential out of bound access to {0} with tainted {1}", + RegName, OffsetName), + formatv("Access of {0} with a tainted {1} that may be too large", + RegName, OffsetName)}; +} +void ArrayBoundCheckerV2::performCheck(const Expr *E, CheckerContext &C) const { // NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping // some new logic here that reasons directly about memory region extents. // Once that logic is more mature, we can bring it back to assumeInBound() @@ -297,12 +329,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, // have some flexibility in defining the base region, we can achieve // various levels of conservatism in our buffer overflow checking. + const SVal Location = C.getSVal(E); + // The header ctype.h (from e.g. glibc) implements the isXXXXX() macros as // #define isXXXXX(arg) (LOOKUP_TABLE[arg] & BITMASK_FOR_XXXXX) // and incomplete analysis of these leads to false positives. As even // accurate reports would be confusing for the users, just disable reports // from these macros: - if (isFromCtypeMacro(LoadS, C.getASTContext())) + if (isFromCtypeMacro(E, C.getASTContext())) return; ProgramStateRef State = C.getState(); @@ -331,9 +365,8 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, if (PrecedesLowerBound && !WithinLowerBound) { // We know that the index definitely precedes the lower bound. - std::string RegName = getRegionName(Reg); - std::string Msg = getPrecedesMsg(RegName, ByteOffset); - reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg); + Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); + reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, Msgs); return; } @@ -350,17 +383,38 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, if (ExceedsUpperBound) { if (!WithinUpperBound) { // We know that the index definitely exceeds the upper bound. - std::string RegName = getRegionName(Reg); - std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset, - *KnownSize, Location); - reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg); + if (isa(E) && isInAddressOf(E, C.getASTContext())) { + // ...but this is within an addressof expression, so we need to check + // for the exceptional case that `&array[size]` is valid. + auto [EqualsToThreshold, NotEqualToThreshold] = + compareValueToThreshold(ExceedsUpperBound, ByteOffset, *KnownSize, + SVB, /*CheckEquality=*/true); + if (EqualsToThreshold && !NotEqualToThreshold) { + // We are definitely in the exceptional case, so return early + // instead of reporting a bug. + C.addTransition(EqualsToThreshold); + return; + } + } + Messages Msgs = getExceedsMsgs(C.getASTContext(), Reg, ByteOffset, + *KnownSize, Location); + reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, Msgs); return; } if (isTainted(State, ByteOffset)) { - // Both cases are possible, but the index is tainted, so report. + // Both cases are possible, but the offset is tainted, so report. std::string RegName = getRegionName(Reg); - std::string Msg = getTaintMsg(RegName); - reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg); + + // Diagnostic detail: "tainted offset" is always correct, but the + // common case is that 'idx' is tainted in 'arr[idx]' and then it's + // nicer to say "tainted index". + const char *OffsetName = "offset"; + if (const auto *ASE = dyn_cast(E)) + if (isTainted(State, ASE->getIdx(), C.getLocationContext())) + OffsetName = "index"; + + Messages Msgs = getTaintMsgs(Reg, OffsetName); + reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, Msgs); return; } } @@ -374,17 +428,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad, void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind, - NonLoc Offset, std::string RegName, - std::string Msg) const { + NonLoc Offset, Messages Msgs) const { ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); if (!ErrorNode) return; - std::string ShortMsg = getShortMsg(Kind, RegName); - auto BR = std::make_unique( - Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode); + Kind == OOB_Taint ? TaintBT : BT, Msgs.Short, Msgs.Full, ErrorNode); // Track back the propagation of taintedness. if (Kind == OOB_Taint) @@ -413,6 +464,18 @@ bool ArrayBoundCheckerV2::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { (MacroName == "isupper") || (MacroName == "isxdigit")); } +bool ArrayBoundCheckerV2::isInAddressOf(const Stmt *S, ASTContext &ACtx) { + ParentMapContext &ParentCtx = ACtx.getParentMapContext(); + do { + const DynTypedNodeList Parents = ParentCtx.getParents(*S); + if (Parents.empty()) + return false; + S = Parents[0].get(); + } while (isa_and_nonnull(S)); + const auto *UnaryOp = dyn_cast_or_null(S); + return UnaryOp && UnaryOp->getOpcode() == UO_AddrOf; +} + void ento::registerArrayBoundCheckerV2(CheckerManager &mgr) { mgr.registerChecker(); } diff --git a/clang/test/Analysis/out-of-bounds-diagnostics.c b/clang/test/Analysis/out-of-bounds-diagnostics.c index 4f5d0d0bc91c5..a6bb04750bb81 100644 --- a/clang/test/Analysis/out-of-bounds-diagnostics.c +++ b/clang/test/Analysis/out-of-bounds-diagnostics.c @@ -9,6 +9,14 @@ void arrayUnderflow(void) { // expected-note@-2 {{Access of 'array' at negative byte offset -12}} } +int underflowWithDeref(void) { + int *p = array; + --p; + return *p; + // expected-warning@-1 {{Out of bound access to memory preceding 'array'}} + // expected-note@-2 {{Access of 'array' at negative byte offset -4}} +} + int scanf(const char *restrict fmt, ...); void taintedIndex(void) { @@ -17,6 +25,38 @@ void taintedIndex(void) { // expected-note@-1 {{Taint originated here}} // expected-note@-2 {{Taint propagated to the 2nd argument}} array[index] = 5; + // expected-warning@-1 {{Potential out of bound access to 'array' with tainted index}} + // expected-note@-2 {{Access of 'array' with a tainted index that may be too large}} +} + +int *taintedIndexAfterTheEndPtr(void) { + // NOTE: Technically speaking, this testcase does not trigger any UB because + // &array[10] is the after-the-end pointer which is well-defined; but this is + // a bug-prone situation and far from the idiomatic use of `&array[size]`, so + // it's better to report an error. This report can be easily silenced by + // writing array+index instead of &array[index]. + int index; + scanf("%d", &index); + // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to the 2nd argument}} + if (index < 0 || index > 10) + return array; + // expected-note@-2 {{Assuming 'index' is >= 0}} + // expected-note@-3 {{Left side of '||' is false}} + // expected-note@-4 {{Assuming 'index' is <= 10}} + // expected-note@-5 {{Taking false branch}} + return &array[index]; + // expected-warning@-1 {{Potential out of bound access to 'array' with tainted index}} + // expected-note@-2 {{Access of 'array' with a tainted index that may be too large}} +} + +void taintedOffset(void) { + int index; + scanf("%d", &index); + // expected-note@-1 {{Taint originated here}} + // expected-note@-2 {{Taint propagated to the 2nd argument}} + int *p = array + index; + p[0] = 5; // expected-warning@-1 {{Potential out of bound access to 'array' with tainted offset}} // expected-note@-2 {{Access of 'array' with a tainted offset that may be too large}} } @@ -27,6 +67,47 @@ void arrayOverflow(void) { // expected-note@-2 {{Access of 'array' at index 12, while it holds only 10 'int' elements}} } +void flippedOverflow(void) { + 12[array] = 5; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at index 12, while it holds only 10 'int' elements}} +} + +int *afterTheEndPtr(void) { + // This is an unusual but standard-compliant way of writing (array + 10). + return &array[10]; // no-warning +} + +int useAfterTheEndPtr(void) { + // ... but dereferencing the after-the-end pointer is still invalid. + return *afterTheEndPtr(); + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at index 10, while it holds only 10 'int' elements}} +} + +int *afterAfterTheEndPtr(void) { + // This is UB, it's invalid to form an after-after-the-end pointer. + return &array[11]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at index 11, while it holds only 10 'int' elements}} +} + +int *potentialAfterTheEndPtr(int idx) { + if (idx < 10) { /* ...do something... */ } + // expected-note@-1 {{Assuming 'idx' is >= 10}} + // expected-note@-2 {{Taking false branch}} + return &array[idx]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at an overflowing index, while it holds only 10 'int' elements}} + // NOTE: On the idx >= 10 branch the normal "optimistic" behavior would've + // been continuing with the assumption that idx == 10 and the return value is + // a legitimate after-the-end pointer. The checker deviates from this by + // reporting an error because this situation is very suspicious and far from + // the idiomatic `&array[size]` expressions. If the report is FP, the + // developer can easily silence it by writing array+idx instead of + // &array[idx]. +} + int scalar; int scalarOverflow(void) { return (&scalar)[1]; @@ -41,12 +122,6 @@ int oneElementArrayOverflow(void) { // expected-note@-2 {{Access of 'oneElementArray' at index 1, while it holds only a single 'int' element}} } -short convertedArray(void) { - return ((short*)array)[47]; - // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} - // expected-note@-2 {{Access of 'array' at index 47, while it holds only 20 'short' elements}} -} - struct vec { int len; double elems[64]; @@ -64,6 +139,28 @@ double arrayInStructPtr(struct vec *pv) { // expected-note@-2 {{Access of the field 'elems' at index 64, while it holds only 64 'double' elements}} } +struct item { + int a, b; +} itemArray[20] = {0}; + +int arrayOfStructs(void) { + return itemArray[35].a; + // expected-warning@-1 {{Out of bound access to memory after the end of 'itemArray'}} + // expected-note@-2 {{Access of 'itemArray' at index 35, while it holds only 20 'struct item' elements}} +} + +int arrayOfStructsArrow(void) { + return (itemArray + 35)->b; + // expected-warning@-1 {{Out of bound access to memory after the end of 'itemArray'}} + // expected-note@-2 {{Access of 'itemArray' at index 35, while it holds only 20 'struct item' elements}} +} + +short convertedArray(void) { + return ((short*)array)[47]; + // expected-warning@-1 {{Out of bound access to memory after the end of 'array'}} + // expected-note@-2 {{Access of 'array' at index 47, while it holds only 20 'short' elements}} +} + struct two_bytes { char lo, hi; }; @@ -85,7 +182,9 @@ int intFromString(void) { } int intFromStringDivisible(void) { - // However, this is reported with indices/elements, because the extent happens to be a multiple of 4. + // However, this is reported with indices/elements, because the extent + // (of the string that consists of 'a', 'b', 'c' and '\0') happens to be a + // multiple of 4 bytes (= sizeof(int)). return ((const int*)"abc")[20]; // expected-warning@-1 {{Out of bound access to memory after the end of the string literal}} // expected-note@-2 {{Access of the string literal at index 20, while it holds only a single 'int' element}} diff --git a/clang/test/Analysis/out-of-bounds-new.cpp b/clang/test/Analysis/out-of-bounds-new.cpp index af4ec47d8358a..f541bdf810d79 100644 --- a/clang/test/Analysis/out-of-bounds-new.cpp +++ b/clang/test/Analysis/out-of-bounds-new.cpp @@ -154,3 +154,29 @@ void test_dynamic_size2(unsigned m,unsigned n){ unsigned *U = nullptr; U = new unsigned[m + n + 1]; } + +//Test creating invalid references, which break the invariant that a reference +//is always holding a value, and could lead to nasty runtime errors. +//(This is not related to operator new, but placed in this file because the +//other test files are not C++.) +int array[10] = {0}; + +void test_after_the_end_reference() { + int &ref = array[10]; // expected-warning{{Out of bound access to memory}} +} + +void test_after_after_the_end_reference() { + int &ref = array[11]; // expected-warning{{Out of bound access to memory}} +} + +int test_reference_that_might_be_after_the_end(int idx) { + // This TC produces no warning because separate analysis of (idx == 10) is + // only introduced _after_ the creation of the reference ref. + if (idx < 0 || idx > 10) + return -2; + int &ref = array[idx]; + if (idx == 10) + return -1; + return ref; +} + diff --git a/clang/test/Analysis/taint-diagnostic-visitor.c b/clang/test/Analysis/taint-diagnostic-visitor.c index 67dc67e627b3b..a3fa1639bffee 100644 --- a/clang/test/Analysis/taint-diagnostic-visitor.c +++ b/clang/test/Analysis/taint-diagnostic-visitor.c @@ -29,8 +29,8 @@ int taintDiagnosticOutOfBound(void) { int Array[] = {1, 2, 3, 4, 5}; scanf("%d", &index); // expected-note {{Taint originated here}} // expected-note@-1 {{Taint propagated to the 2nd argument}} - return Array[index]; // expected-warning {{Potential out of bound access to 'Array' with tainted offset}} - // expected-note@-1 {{Access of 'Array' with a tainted offset that may be too large}} + return Array[index]; // expected-warning {{Potential out of bound access to 'Array' with tainted index}} + // expected-note@-1 {{Access of 'Array' with a tainted index that may be too large}} } int taintDiagnosticDivZero(int operand) {