Skip to content

Commit 02b51e5

Browse files
[analyzer][solver] Redesign constraint ranges data structure
ImmutableSet doesn't seem like the perfect fit for the RangeSet data structure. It is good for saving memory in a persistent setting, but not for the case when the population of the container is tiny. This commit replaces RangeSet implementation and redesigns the most common operations to be more efficient. Differential Revision: https://reviews.llvm.org/D86465
1 parent 9cdbdbe commit 02b51e5

File tree

4 files changed

+857
-352
lines changed

4 files changed

+857
-352
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h

Lines changed: 243 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
1717
#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
1818
#include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h"
19+
#include "llvm/ADT/APSInt.h"
20+
#include "llvm/Support/Allocator.h"
1921

2022
namespace clang {
2123

@@ -24,21 +26,19 @@ namespace ento {
2426
/// A Range represents the closed range [from, to]. The caller must
2527
/// guarantee that from <= to. Note that Range is immutable, so as not
2628
/// to subvert RangeSet's immutability.
27-
class Range : public std::pair<const llvm::APSInt *, const llvm::APSInt *> {
29+
class Range {
2830
public:
29-
Range(const llvm::APSInt &from, const llvm::APSInt &to)
30-
: std::pair<const llvm::APSInt *, const llvm::APSInt *>(&from, &to) {
31-
assert(from <= to);
31+
Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) {
32+
assert(From <= To);
3233
}
3334

34-
Range(const llvm::APSInt &point)
35-
: std::pair<const llvm::APSInt *, const llvm::APSInt *>(&point, &point) {}
35+
Range(const llvm::APSInt &Point) : Range(Point, Point) {}
3636

37-
bool Includes(const llvm::APSInt &v) const {
38-
return *first <= v && v <= *second;
37+
bool Includes(const llvm::APSInt &Point) const {
38+
return From() <= Point && Point <= To();
3939
}
40-
const llvm::APSInt &From() const { return *first; }
41-
const llvm::APSInt &To() const { return *second; }
40+
const llvm::APSInt &From() const { return *Impl.first; }
41+
const llvm::APSInt &To() const { return *Impl.second; }
4242
const llvm::APSInt *getConcreteValue() const {
4343
return &From() == &To() ? &From() : nullptr;
4444
}
@@ -47,93 +47,264 @@ class Range : public std::pair<const llvm::APSInt *, const llvm::APSInt *> {
4747
ID.AddPointer(&From());
4848
ID.AddPointer(&To());
4949
}
50-
};
50+
void dump(raw_ostream &OS) const;
5151

52-
class RangeTrait : public llvm::ImutContainerInfo<Range> {
53-
public:
54-
// When comparing if one Range is less than another, we should compare
55-
// the actual APSInt values instead of their pointers. This keeps the order
56-
// consistent (instead of comparing by pointer values) and can potentially
57-
// be used to speed up some of the operations in RangeSet.
58-
static inline bool isLess(key_type_ref lhs, key_type_ref rhs) {
59-
return *lhs.first < *rhs.first ||
60-
(!(*rhs.first < *lhs.first) && *lhs.second < *rhs.second);
61-
}
52+
// In order to keep non-overlapping ranges sorted, we can compare only From
53+
// points.
54+
bool operator<(const Range &RHS) const { return From() < RHS.From(); }
55+
56+
bool operator==(const Range &RHS) const { return Impl == RHS.Impl; }
57+
bool operator!=(const Range &RHS) const { return !operator==(RHS); }
58+
59+
private:
60+
std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl;
6261
};
6362

64-
/// RangeSet contains a set of ranges. If the set is empty, then
65-
/// there the value of a symbol is overly constrained and there are no
66-
/// possible values for that symbol.
63+
/// @class RangeSet is a persistent set of non-overlapping ranges.
64+
///
65+
/// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which
66+
/// also supports the most common operations performed on range sets.
67+
///
68+
/// Empty set corresponds to an overly constrained symbol meaning that there
69+
/// are no possible values for that symbol.
6770
class RangeSet {
68-
typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet;
69-
PrimRangeSet ranges; // no need to make const, since it is an
70-
// ImmutableSet - this allows default operator=
71-
// to work.
7271
public:
73-
typedef PrimRangeSet::Factory Factory;
74-
typedef PrimRangeSet::iterator iterator;
75-
76-
RangeSet(PrimRangeSet RS) : ranges(RS) {}
77-
78-
/// Create a new set with all ranges of this set and RS.
79-
/// Possible intersections are not checked here.
80-
RangeSet addRange(Factory &F, const RangeSet &RS) {
81-
PrimRangeSet Ranges(RS.ranges);
82-
for (const auto &range : ranges)
83-
Ranges = F.add(Ranges, range);
84-
return RangeSet(Ranges);
85-
}
86-
87-
iterator begin() const { return ranges.begin(); }
88-
iterator end() const { return ranges.end(); }
72+
class Factory;
8973

90-
bool isEmpty() const { return ranges.isEmpty(); }
74+
private:
75+
// We use llvm::SmallVector as the underlying container for the following
76+
// reasons:
77+
//
78+
// * Range sets are usually very simple, 1 or 2 ranges.
79+
// That's why llvm::ImmutableSet is not perfect.
80+
//
81+
// * Ranges in sets are NOT overlapping, so it is natural to keep them
82+
// sorted for efficient operations and queries. For this reason,
83+
// llvm::SmallSet doesn't fit the requirements, it is not sorted when it
84+
// is a vector.
85+
//
86+
// * Range set operations usually a bit harder than add/remove a range.
87+
// Complex operations might do many of those for just one range set.
88+
// Formerly it used to be llvm::ImmutableSet, which is inefficient for our
89+
// purposes as we want to make these operations BOTH immutable AND
90+
// efficient.
91+
//
92+
// * Iteration over ranges is widespread and a more cache-friendly
93+
// structure is preferred.
94+
using ImplType = llvm::SmallVector<Range, 4>;
95+
96+
struct ContainerType : public ImplType, public llvm::FoldingSetNode {
97+
void Profile(llvm::FoldingSetNodeID &ID) const {
98+
for (const Range &It : *this) {
99+
It.Profile(ID);
100+
}
101+
}
102+
};
103+
// This is a non-owning pointer to an actual container.
104+
// The memory is fully managed by the factory and is alive as long as the
105+
// factory itself is alive.
106+
// It is a pointer as opposed to a reference, so we can easily reassign
107+
// RangeSet objects.
108+
using UnderlyingType = const ContainerType *;
109+
UnderlyingType Impl;
91110

92-
/// Construct a new RangeSet representing '{ [from, to] }'.
93-
RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to)
94-
: ranges(F.add(F.getEmptySet(), Range(from, to))) {}
111+
public:
112+
using const_iterator = ImplType::const_iterator;
113+
114+
const_iterator begin() const { return Impl->begin(); }
115+
const_iterator end() const { return Impl->end(); }
116+
size_t size() const { return Impl->size(); }
117+
118+
bool isEmpty() const { return Impl->empty(); }
119+
120+
class Factory {
121+
public:
122+
Factory(BasicValueFactory &BV) : ValueFactory(BV) {}
123+
124+
/// Create a new set with all ranges from both LHS and RHS.
125+
/// Possible intersections are not checked here.
126+
///
127+
/// Complexity: O(N + M)
128+
/// where N = size(LHS), M = size(RHS)
129+
RangeSet add(RangeSet LHS, RangeSet RHS);
130+
/// Create a new set with all ranges from the original set plus the new one.
131+
/// Possible intersections are not checked here.
132+
///
133+
/// Complexity: O(N)
134+
/// where N = size(Original)
135+
RangeSet add(RangeSet Original, Range Element);
136+
/// Create a new set with all ranges from the original set plus the point.
137+
/// Possible intersections are not checked here.
138+
///
139+
/// Complexity: O(N)
140+
/// where N = size(Original)
141+
RangeSet add(RangeSet Original, const llvm::APSInt &Point);
142+
143+
RangeSet getEmptySet() { return &EmptySet; }
144+
145+
/// Create a new set with just one range.
146+
/// @{
147+
RangeSet getRangeSet(Range Origin);
148+
RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) {
149+
return getRangeSet(Range(From, To));
150+
}
151+
RangeSet getRangeSet(const llvm::APSInt &Origin) {
152+
return getRangeSet(Origin, Origin);
153+
}
154+
/// @}
155+
156+
/// Intersect the given range sets.
157+
///
158+
/// Complexity: O(N + M)
159+
/// where N = size(LHS), M = size(RHS)
160+
RangeSet intersect(RangeSet LHS, RangeSet RHS);
161+
/// Intersect the given set with the closed range [Lower, Upper].
162+
///
163+
/// Unlike the Range type, this range uses modular arithmetic, corresponding
164+
/// to the common treatment of C integer overflow. Thus, if the Lower bound
165+
/// is greater than the Upper bound, the range is taken to wrap around. This
166+
/// is equivalent to taking the intersection with the two ranges [Min,
167+
/// Upper] and [Lower, Max], or, alternatively, /removing/ all integers
168+
/// between Upper and Lower.
169+
///
170+
/// Complexity: O(N)
171+
/// where N = size(What)
172+
RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper);
173+
/// Intersect the given range with the given point.
174+
///
175+
/// The result can be either an empty set or a set containing the given
176+
/// point depending on whether the point is in the range set.
177+
///
178+
/// Complexity: O(logN)
179+
/// where N = size(What)
180+
RangeSet intersect(RangeSet What, llvm::APSInt Point);
181+
182+
/// Delete the given point from the range set.
183+
///
184+
/// Complexity: O(N)
185+
/// where N = size(From)
186+
RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point);
187+
/// Negate the given range set.
188+
///
189+
/// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus
190+
/// operation under the values of the type.
191+
///
192+
/// We also handle MIN because applying unary minus to MIN does not change
193+
/// it.
194+
/// Example 1:
195+
/// char x = -128; // -128 is a MIN value in a range of 'char'
196+
/// char y = -x; // y: -128
197+
///
198+
/// Example 2:
199+
/// unsigned char x = 0; // 0 is a MIN value in a range of 'unsigned char'
200+
/// unsigned char y = -x; // y: 0
201+
///
202+
/// And it makes us to separate the range
203+
/// like [MIN, N] to [MIN, MIN] U [-N, MAX].
204+
/// For instance, whole range is {-128..127} and subrange is [-128,-126],
205+
/// thus [-128,-127,-126,...] negates to [-128,...,126,127].
206+
///
207+
/// Negate restores disrupted ranges on bounds,
208+
/// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B].
209+
///
210+
/// Negate is a self-inverse function, i.e. negate(negate(R)) == R.
211+
///
212+
/// Complexity: O(N)
213+
/// where N = size(What)
214+
RangeSet negate(RangeSet What);
215+
216+
private:
217+
/// Return a persistent version of the given container.
218+
RangeSet makePersistent(ContainerType &&From);
219+
/// Construct a new persistent version of the given container.
220+
ContainerType *construct(ContainerType &&From);
221+
222+
RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS);
223+
224+
// Many operations include producing new APSInt values and that's why
225+
// we need this factory.
226+
BasicValueFactory &ValueFactory;
227+
// Allocator for all the created containers.
228+
// Containers might own their own memory and that's why it is specific
229+
// for the type, so it calls container destructors upon deletion.
230+
llvm::SpecificBumpPtrAllocator<ContainerType> Arena;
231+
// Usually we deal with the same ranges and range sets over and over.
232+
// Here we track all created containers and try not to repeat ourselves.
233+
llvm::FoldingSet<ContainerType> Cache;
234+
static ContainerType EmptySet;
235+
};
236+
237+
RangeSet(const RangeSet &) = default;
238+
RangeSet &operator=(const RangeSet &) = default;
239+
RangeSet(RangeSet &&) = default;
240+
RangeSet &operator=(RangeSet &&) = default;
241+
~RangeSet() = default;
242+
243+
/// Construct a new RangeSet representing '{ [From, To] }'.
244+
RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To)
245+
: RangeSet(F.getRangeSet(From, To)) {}
95246

96247
/// Construct a new RangeSet representing the given point as a range.
97-
RangeSet(Factory &F, const llvm::APSInt &point) : RangeSet(F, point, point) {}
248+
RangeSet(Factory &F, const llvm::APSInt &Point)
249+
: RangeSet(F.getRangeSet(Point)) {}
250+
251+
static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) {
252+
ID.AddPointer(RS.Impl);
253+
}
98254

99255
/// Profile - Generates a hash profile of this RangeSet for use
100256
/// by FoldingSet.
101-
void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); }
257+
void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); }
102258

103259
/// getConcreteValue - If a symbol is contrained to equal a specific integer
104260
/// constant then this method returns that value. Otherwise, it returns
105261
/// NULL.
106262
const llvm::APSInt *getConcreteValue() const {
107-
return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : nullptr;
263+
return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr;
108264
}
109265

110-
/// Get a minimal value covered by the ranges in the set
266+
/// Get the minimal value covered by the ranges in the set.
267+
///
268+
/// Complexity: O(1)
111269
const llvm::APSInt &getMinValue() const;
112-
/// Get a maximal value covered by the ranges in the set
270+
/// Get the maximal value covered by the ranges in the set.
271+
///
272+
/// Complexity: O(1)
113273
const llvm::APSInt &getMaxValue() const;
114274

115-
private:
116-
void IntersectInRange(BasicValueFactory &BV, Factory &F,
117-
const llvm::APSInt &Lower, const llvm::APSInt &Upper,
118-
PrimRangeSet &newRanges, PrimRangeSet::iterator &i,
119-
PrimRangeSet::iterator &e) const;
275+
/// Test whether the given point is contained by any of the ranges.
276+
///
277+
/// Complexity: O(logN)
278+
/// where N = size(this)
279+
bool contains(llvm::APSInt Point) const { return containsImpl(Point); }
280+
281+
void dump(raw_ostream &OS) const;
282+
283+
bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; }
284+
bool operator!=(const RangeSet &Other) const { return !(*this == Other); }
120285

286+
private:
287+
/* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {}
288+
/* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {}
289+
290+
/// Pin given points to the type represented by the current range set.
291+
///
292+
/// This makes parameter points to be in-out parameters.
293+
/// In order to maintain consistent types across all of the ranges in the set
294+
/// and to keep all the operations to compare ONLY points of the same type, we
295+
/// need to pin every point before any operation.
296+
///
297+
/// @Returns true if the given points can be converted to the target type
298+
/// without changing the values (i.e. trivially) and false otherwise.
299+
/// @{
121300
bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const;
301+
bool pin(llvm::APSInt &Point) const;
302+
/// @}
122303

123-
public:
124-
RangeSet Intersect(BasicValueFactory &BV, Factory &F, llvm::APSInt Lower,
125-
llvm::APSInt Upper) const;
126-
RangeSet Intersect(BasicValueFactory &BV, Factory &F,
127-
const RangeSet &Other) const;
128-
RangeSet Negate(BasicValueFactory &BV, Factory &F) const;
129-
RangeSet Delete(BasicValueFactory &BV, Factory &F,
130-
const llvm::APSInt &Point) const;
131-
132-
void print(raw_ostream &os) const;
133-
134-
bool operator==(const RangeSet &other) const {
135-
return ranges == other.ranges;
136-
}
304+
// This version of this function modifies its arguments (pins it).
305+
bool containsImpl(llvm::APSInt &Point) const;
306+
307+
friend class Factory;
137308
};
138309

139310
using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>;

0 commit comments

Comments
 (0)