Skip to content

Commit 432c425

Browse files
committed
[Support] Detect invalid formatv() calls
- Detect formatv() calls where the number of replacement parameters expected after parsing the format string does not match the number provides in the formatv() call. - assert() in debug builds, and fail the formatv() call in release builds by just emitting an error message in the stream.
1 parent 3a5c578 commit 432c425

File tree

7 files changed

+205
-58
lines changed

7 files changed

+205
-58
lines changed

clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1401,7 +1401,10 @@ void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
14011401
ErrnoNote =
14021402
llvm::formatv("After calling '{0}' {1}", FunctionName, ErrnoNote);
14031403
} else {
1404-
CaseNote = llvm::formatv(Case.getNote().str().c_str(), FunctionName);
1404+
// Disable formatv() validation as the case note may not always have the
1405+
// {0} placeholder for function name.
1406+
CaseNote =
1407+
llvm::formatv(false, Case.getNote().str().c_str(), FunctionName);
14051408
}
14061409
const SVal RV = Call.getReturnValue();
14071410

llvm/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ set(LLVM_LINK_COMPONENTS
55
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
66
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
77
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
8+
add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)

llvm/benchmarks/FormatVariadicBM.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===- FormatVariadicBM.cpp - formatv() benchmark ---------- --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "benchmark/benchmark.h"
10+
#include "llvm/Support/FormatVariadic.h"
11+
#include <algorithm>
12+
#include <string>
13+
#include <vector>
14+
15+
using namespace llvm;
16+
using namespace std;
17+
18+
// Generate a list of format strings that have `NumReplacements` replacements
19+
// by permuting the replacements and some literal text.
20+
static vector<string> getFormatStrings(int NumReplacements) {
21+
vector<string> Components;
22+
for (int I = 0; I < NumReplacements; I++)
23+
Components.push_back("{" + to_string(I) + "}");
24+
// Intersperse these with some other literal text (_).
25+
const string_view Literal = "____";
26+
for (char C : Literal)
27+
Components.push_back(string(1, C));
28+
29+
vector<string> Formats;
30+
do {
31+
string Concat;
32+
for (const string &C : Components)
33+
Concat += C;
34+
Formats.emplace_back(Concat);
35+
} while (next_permutation(Components.begin(), Components.end()));
36+
return Formats;
37+
}
38+
39+
// Generate the set of formats to exercise outside the benchmark code.
40+
static const vector<vector<string>> Formats = {
41+
getFormatStrings(1), getFormatStrings(2), getFormatStrings(3),
42+
getFormatStrings(4), getFormatStrings(5),
43+
};
44+
45+
// Benchmark formatv() for a variety of format strings and 1-5 replacements.
46+
static void BM_FormatVariadic(benchmark::State &state) {
47+
for (auto _ : state) {
48+
for (const string &Fmt : Formats[0])
49+
formatv(Fmt.c_str(), 1).str();
50+
for (const string &Fmt : Formats[1])
51+
formatv(Fmt.c_str(), 1, 2).str();
52+
for (const string &Fmt : Formats[2])
53+
formatv(Fmt.c_str(), 1, 2, 3).str();
54+
for (const string &Fmt : Formats[3])
55+
formatv(Fmt.c_str(), 1, 2, 3, 4).str();
56+
for (const string &Fmt : Formats[4])
57+
formatv(Fmt.c_str(), 1, 2, 3, 4, 5).str();
58+
}
59+
}
60+
61+
BENCHMARK(BM_FormatVariadic);
62+
63+
BENCHMARK_MAIN();

llvm/include/llvm/Support/FormatVariadic.h

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -67,23 +67,20 @@ class formatv_object_base {
6767
protected:
6868
StringRef Fmt;
6969
ArrayRef<support::detail::format_adapter *> Adapters;
70-
71-
static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
72-
size_t &Align, char &Pad);
73-
74-
static std::pair<ReplacementItem, StringRef>
75-
splitLiteralAndReplacement(StringRef Fmt);
70+
bool Validate;
7671

7772
formatv_object_base(StringRef Fmt,
78-
ArrayRef<support::detail::format_adapter *> Adapters)
79-
: Fmt(Fmt), Adapters(Adapters) {}
73+
ArrayRef<support::detail::format_adapter *> Adapters,
74+
bool Validate)
75+
: Fmt(Fmt), Adapters(Adapters), Validate(Validate) {}
8076

8177
formatv_object_base(formatv_object_base const &rhs) = delete;
8278
formatv_object_base(formatv_object_base &&rhs) = default;
8379

8480
public:
8581
void format(raw_ostream &S) const {
86-
for (auto &R : parseFormatString(Fmt)) {
82+
const auto Replacements = parseFormatString(Fmt, Adapters.size(), Validate);
83+
for (const auto &R : Replacements) {
8784
if (R.Type == ReplacementType::Empty)
8885
continue;
8986
if (R.Type == ReplacementType::Literal) {
@@ -101,9 +98,10 @@ class formatv_object_base {
10198
Align.format(S, R.Options);
10299
}
103100
}
104-
static SmallVector<ReplacementItem, 2> parseFormatString(StringRef Fmt);
105101

106-
static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec);
102+
// Parse and optionally validate format string (in debug builds).
103+
static SmallVector<ReplacementItem, 2>
104+
parseFormatString(StringRef Fmt, size_t NumArgs, bool Validate);
107105

108106
std::string str() const {
109107
std::string Result;
@@ -149,8 +147,8 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
149147
};
150148

151149
public:
152-
formatv_object(StringRef Fmt, Tuple &&Params)
153-
: formatv_object_base(Fmt, ParameterPointers),
150+
formatv_object(StringRef Fmt, Tuple &&Params, bool Validate)
151+
: formatv_object_base(Fmt, ParameterPointers, Validate),
154152
Parameters(std::move(Params)) {
155153
ParameterPointers = std::apply(create_adapters(), Parameters);
156154
}
@@ -247,15 +245,22 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
247245
// assertion. Otherwise, it will try to do something reasonable, but in general
248246
// the details of what that is are undefined.
249247
//
248+
249+
// formatv() with validation enable/disable controlled by the first argument.
250250
template <typename... Ts>
251-
inline auto formatv(const char *Fmt, Ts &&...Vals)
251+
inline auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
252252
-> formatv_object<decltype(std::make_tuple(
253253
support::detail::build_format_adapter(std::forward<Ts>(Vals))...))> {
254254
using ParamTuple = decltype(std::make_tuple(
255255
support::detail::build_format_adapter(std::forward<Ts>(Vals))...));
256-
return formatv_object<ParamTuple>(
257-
Fmt, std::make_tuple(support::detail::build_format_adapter(
258-
std::forward<Ts>(Vals))...));
256+
auto Params = std::make_tuple(
257+
support::detail::build_format_adapter(std::forward<Ts>(Vals))...);
258+
return formatv_object<ParamTuple>(Fmt, std::move(Params), Validate);
259+
}
260+
261+
// formatv() with validation enabled.
262+
template <typename... Ts> inline auto formatv(const char *Fmt, Ts &&...Vals) {
263+
return formatv<Ts...>(true, Fmt, std::forward<Ts>(Vals)...);
259264
}
260265

261266
} // end namespace llvm

llvm/lib/Support/FormatVariadic.cpp

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ static std::optional<AlignStyle> translateLocChar(char C) {
2525
LLVM_BUILTIN_UNREACHABLE;
2626
}
2727

28-
bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
29-
size_t &Align, char &Pad) {
28+
static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
29+
size_t &Align, char &Pad) {
3030
Where = AlignStyle::Right;
3131
Align = 0;
3232
Pad = ' ';
@@ -35,8 +35,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
3535

3636
if (Spec.size() > 1) {
3737
// A maximum of 2 characters at the beginning can be used for something
38-
// other
39-
// than the width.
38+
// other than the width.
4039
// If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
4140
// contains the width.
4241
// Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
@@ -55,8 +54,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
5554
return !Failed;
5655
}
5756

58-
std::optional<ReplacementItem>
59-
formatv_object_base::parseReplacementItem(StringRef Spec) {
57+
static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
6058
StringRef RepString = Spec.trim("{}");
6159

6260
// If the replacement sequence does not start with a non-negative integer,
@@ -82,15 +80,14 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
8280
RepString = StringRef();
8381
}
8482
RepString = RepString.trim();
85-
if (!RepString.empty()) {
86-
assert(false && "Unexpected characters found in replacement string!");
87-
}
83+
assert(RepString.empty() &&
84+
"Unexpected characters found in replacement string!");
8885

8986
return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
9087
}
9188

92-
std::pair<ReplacementItem, StringRef>
93-
formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
89+
static std::pair<ReplacementItem, StringRef>
90+
splitLiteralAndReplacement(StringRef Fmt) {
9491
while (!Fmt.empty()) {
9592
// Everything up until the first brace is a literal.
9693
if (Fmt.front() != '{') {
@@ -143,15 +140,77 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
143140
return std::make_pair(ReplacementItem{Fmt}, StringRef());
144141
}
145142

143+
#ifndef NDEBUG
144+
#define ENABLE_VALIDATION 1
145+
#else
146+
#define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
147+
#endif
148+
146149
SmallVector<ReplacementItem, 2>
147-
formatv_object_base::parseFormatString(StringRef Fmt) {
150+
formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
151+
bool Validate) {
148152
SmallVector<ReplacementItem, 2> Replacements;
149-
ReplacementItem I;
153+
154+
#if ENABLE_VALIDATION
155+
const StringRef SavedFmtStr = Fmt;
156+
size_t NumExpectedArgs = 0;
157+
#endif
158+
150159
while (!Fmt.empty()) {
160+
ReplacementItem I;
151161
std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
152162
if (I.Type != ReplacementType::Empty)
153163
Replacements.push_back(I);
164+
#if ENABLE_VALIDATION
165+
if (I.Type == ReplacementType::Format)
166+
NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1);
167+
#endif
168+
}
169+
170+
#if ENABLE_VALIDATION
171+
if (!Validate)
172+
return Replacements;
173+
174+
// Perform additional validation. Verify that the number of arguments matches
175+
// the number of replacement indices and that there are no holes in the
176+
// replacement indices.
177+
178+
// When validation fails, return an array of replacement items that
179+
// will print an error message as the outout of this formatv() (used when
180+
// validation is enabled in release mode).
181+
auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
182+
return SmallVector<ReplacementItem, 2>{
183+
ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
184+
ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
185+
};
186+
187+
if (NumExpectedArgs != NumArgs) {
188+
errs() << formatv(
189+
"Expected {0} Args, but got {1} for format string '{2}'\n",
190+
NumExpectedArgs, NumArgs, SavedFmtStr);
191+
assert(0 && "Invalid formatv() call");
192+
return getErrorReplacements("Unexpected number of arguments");
193+
}
194+
195+
// Find the number of unique indices seen. All replacement indices
196+
// are < NumExpectedArgs.
197+
SmallVector<bool> Indices(NumExpectedArgs);
198+
size_t Count = 0;
199+
for (const ReplacementItem &I : Replacements) {
200+
if (I.Type != ReplacementType::Format || Indices[I.Index])
201+
continue;
202+
Indices[I.Index] = true;
203+
++Count;
204+
}
205+
206+
if (Count != NumExpectedArgs) {
207+
errs() << formatv(
208+
"Replacement field indices cannot have holes for format string '{0}'\n",
209+
SavedFmtStr);
210+
assert(0 && "Invalid format string");
211+
return getErrorReplacements("Replacement indices have holes");
154212
}
213+
#endif // ENABLE_VALIDATION
155214
return Replacements;
156215
}
157216

0 commit comments

Comments
 (0)