Skip to content

Commit 0a27c4e

Browse files
authored
[StrTable] Use string literal emission for intrinsics on non-MSVC platforms (#124856)
This mainly transitions the LLVM intrinsic string table from character emission to string literal emission, which I confirmed happens for me locally. I moved the guts of StringToOffsetTable to a cpp file so I could move the `EmitLongStrLiterals` cl::opt global to a non-vague linkage home in the `TableGen` library. I had to add missing FormatVariadic.h includes to account for moving other includes to a cpp file.
1 parent 30ae47e commit 0a27c4e

File tree

11 files changed

+145
-113
lines changed

11 files changed

+145
-113
lines changed

llvm/cmake/modules/TableGen.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,9 @@ function(tablegen project ofn)
6868
# char literals, instead. If we're cross-compiling, then conservatively assume
6969
# that the source might be consumed by MSVC.
7070
# [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017
71-
if (MSVC AND project STREQUAL LLVM)
71+
# Don't pass this flag to mlir-src-sharder, since it doesn't support the
72+
# flag, and it doesn't need it.
73+
if (MSVC AND NOT "${project}" STREQUAL "MLIR_SRC_SHARDER")
7274
list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0")
7375
endif()
7476
if (CMAKE_GENERATOR MATCHES "Visual Studio")

llvm/include/llvm/TableGen/Main.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_TABLEGEN_MAIN_H
1414
#define LLVM_TABLEGEN_MAIN_H
1515

16+
#include "llvm/Support/CommandLine.h"
1617
#include <functional>
1718

1819
namespace llvm {
@@ -27,6 +28,10 @@ using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records);
2728
int TableGenMain(const char *argv0,
2829
std::function<TableGenMainFn> MainFn = nullptr);
2930

31+
/// Controls emitting large character arrays as strings or character arrays.
32+
/// Typically set to false when building with MSVC.
33+
extern cl::opt<bool> EmitLongStrLiterals;
34+
3035
} // end namespace llvm
3136

3237
#endif // LLVM_TABLEGEN_MAIN_H

llvm/include/llvm/TableGen/StringToOffsetTable.h

Lines changed: 3 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
#include "llvm/ADT/SmallString.h"
1313
#include "llvm/ADT/StringExtras.h"
1414
#include "llvm/ADT/StringMap.h"
15-
#include "llvm/Support/FormatVariadic.h"
16-
#include "llvm/Support/raw_ostream.h"
1715
#include <optional>
1816

1917
namespace llvm {
@@ -36,17 +34,7 @@ class StringToOffsetTable {
3634
bool empty() const { return StringOffset.empty(); }
3735
size_t size() const { return AggregateString.size(); }
3836

39-
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
40-
auto [II, Inserted] = StringOffset.insert({Str, size()});
41-
if (Inserted) {
42-
// Add the string to the aggregate if this is the first time found.
43-
AggregateString.append(Str.begin(), Str.end());
44-
if (appendZero)
45-
AggregateString += '\0';
46-
}
47-
48-
return II->second;
49-
}
37+
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true);
5038

5139
// Returns the offset of `Str` in the table if its preset, else return
5240
// std::nullopt.
@@ -69,96 +57,10 @@ class StringToOffsetTable {
6957
// `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
7058
// valid identifiers to declare.
7159
void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
72-
const Twine &Indent = "") const {
73-
OS << formatv(R"(
74-
#ifdef __GNUC__
75-
#pragma GCC diagnostic push
76-
#pragma GCC diagnostic ignored "-Woverlength-strings"
77-
#endif
78-
{0}static constexpr char {1}Storage[] = )",
79-
Indent, Name);
80-
81-
// MSVC silently miscompiles string literals longer than 64k in some
82-
// circumstances. When the string table is longer, emit it as an array of
83-
// character literals.
84-
bool UseChars = AggregateString.size() > (64 * 1024);
85-
OS << (UseChars ? "{\n" : "\n");
86-
87-
llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
88-
llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
89-
// We should always have an empty string at the start, and because these are
90-
// null terminators rather than separators, we'll have one at the end as
91-
// well. Skip the end one.
92-
assert(Strings.front().empty() && "Expected empty initial string!");
93-
assert(Strings.back().empty() &&
94-
"Expected empty string at the end due to terminators!");
95-
Strings.pop_back();
96-
for (StringRef Str : Strings) {
97-
OS << LineSep << Indent << " ";
98-
// If we can, just emit this as a string literal to be concatenated.
99-
if (!UseChars) {
100-
OS << "\"";
101-
OS.write_escaped(Str);
102-
OS << "\\0\"";
103-
continue;
104-
}
105-
106-
llvm::ListSeparator CharSep(", ");
107-
for (char C : Str) {
108-
OS << CharSep << "'";
109-
OS.write_escaped(StringRef(&C, 1));
110-
OS << "'";
111-
}
112-
OS << CharSep << "'\\0'";
113-
}
114-
OS << LineSep << Indent << (UseChars ? "};" : " ;");
115-
116-
OS << formatv(R"(
117-
#ifdef __GNUC__
118-
#pragma GCC diagnostic pop
119-
#endif
120-
121-
{0}static constexpr llvm::StringTable {1} =
122-
{0} {1}Storage;
123-
)",
124-
Indent, Name);
125-
}
60+
const Twine &Indent = "") const;
12661

12762
// Emit the string as one single string.
128-
void EmitString(raw_ostream &O) const {
129-
// Escape the string.
130-
SmallString<256> EscapedStr;
131-
raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
132-
133-
O << " \"";
134-
unsigned CharsPrinted = 0;
135-
for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
136-
if (CharsPrinted > 70) {
137-
O << "\"\n \"";
138-
CharsPrinted = 0;
139-
}
140-
O << EscapedStr[i];
141-
++CharsPrinted;
142-
143-
// Print escape sequences all together.
144-
if (EscapedStr[i] != '\\')
145-
continue;
146-
147-
assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
148-
if (isDigit(EscapedStr[i + 1])) {
149-
assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
150-
"Expected 3 digit octal escape!");
151-
O << EscapedStr[++i];
152-
O << EscapedStr[++i];
153-
O << EscapedStr[++i];
154-
CharsPrinted += 3;
155-
} else {
156-
O << EscapedStr[++i];
157-
++CharsPrinted;
158-
}
159-
}
160-
O << "\"";
161-
}
63+
void EmitString(raw_ostream &O) const;
16264
};
16365

16466
} // end namespace llvm

llvm/lib/TableGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ add_llvm_component_library(LLVMTableGen
77
Record.cpp
88
SetTheory.cpp
99
StringMatcher.cpp
10+
StringToOffsetTable.cpp
1011
TableGenBackend.cpp
1112
TableGenBackendSkeleton.cpp
1213
TGLexer.cpp

llvm/lib/TableGen/Main.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,15 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
6464
static cl::opt<bool>
6565
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
6666

67+
namespace llvm {
68+
cl::opt<bool> EmitLongStrLiterals(
69+
"long-string-literals",
70+
cl::desc("when emitting large string tables, prefer string literals over "
71+
"comma-separated char literals. This can be a readability and "
72+
"compile-time performance win, but upsets some compilers"),
73+
cl::Hidden, cl::init(true));
74+
} // end namespace llvm
75+
6776
static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
6877
"no-warn-on-unused-template-args",
6978
cl::desc("Disable unused template argument warnings."));
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/TableGen/StringToOffsetTable.h"
10+
#include "llvm/Support/FormatVariadic.h"
11+
#include "llvm/Support/raw_ostream.h"
12+
#include "llvm/TableGen/Main.h"
13+
14+
using namespace llvm;
15+
16+
unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str,
17+
bool appendZero) {
18+
auto [II, Inserted] = StringOffset.insert({Str, size()});
19+
if (Inserted) {
20+
// Add the string to the aggregate if this is the first time found.
21+
AggregateString.append(Str.begin(), Str.end());
22+
if (appendZero)
23+
AggregateString += '\0';
24+
}
25+
26+
return II->second;
27+
}
28+
29+
void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name,
30+
const Twine &Indent) const {
31+
OS << formatv(R"(
32+
#ifdef __GNUC__
33+
#pragma GCC diagnostic push
34+
#pragma GCC diagnostic ignored "-Woverlength-strings"
35+
#endif
36+
{0}static constexpr char {1}Storage[] = )",
37+
Indent, Name);
38+
39+
// MSVC silently miscompiles string literals longer than 64k in some
40+
// circumstances. The build system sets EmitLongStrLiterals to false when it
41+
// detects that it is targetting MSVC. When that option is false and the
42+
// string table is longer than 64k, emit it as an array of character
43+
// literals.
44+
bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
45+
OS << (UseChars ? "{\n" : "\n");
46+
47+
llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
48+
llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
49+
// We should always have an empty string at the start, and because these are
50+
// null terminators rather than separators, we'll have one at the end as
51+
// well. Skip the end one.
52+
assert(Strings.front().empty() && "Expected empty initial string!");
53+
assert(Strings.back().empty() &&
54+
"Expected empty string at the end due to terminators!");
55+
Strings.pop_back();
56+
for (StringRef Str : Strings) {
57+
OS << LineSep << Indent << " ";
58+
// If we can, just emit this as a string literal to be concatenated.
59+
if (!UseChars) {
60+
OS << "\"";
61+
OS.write_escaped(Str);
62+
OS << "\\0\"";
63+
continue;
64+
}
65+
66+
llvm::ListSeparator CharSep(", ");
67+
for (char C : Str) {
68+
OS << CharSep << "'";
69+
OS.write_escaped(StringRef(&C, 1));
70+
OS << "'";
71+
}
72+
OS << CharSep << "'\\0'";
73+
}
74+
OS << LineSep << Indent << (UseChars ? "};" : " ;");
75+
76+
OS << formatv(R"(
77+
#ifdef __GNUC__
78+
#pragma GCC diagnostic pop
79+
#endif
80+
81+
{0}static constexpr llvm::StringTable {1} =
82+
{0} {1}Storage;
83+
)",
84+
Indent, Name);
85+
}
86+
87+
void StringToOffsetTable::EmitString(raw_ostream &O) const {
88+
// Escape the string.
89+
SmallString<256> EscapedStr;
90+
raw_svector_ostream(EscapedStr).write_escaped(AggregateString);
91+
92+
O << " \"";
93+
unsigned CharsPrinted = 0;
94+
for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
95+
if (CharsPrinted > 70) {
96+
O << "\"\n \"";
97+
CharsPrinted = 0;
98+
}
99+
O << EscapedStr[i];
100+
++CharsPrinted;
101+
102+
// Print escape sequences all together.
103+
if (EscapedStr[i] != '\\')
104+
continue;
105+
106+
assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
107+
if (isDigit(EscapedStr[i + 1])) {
108+
assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
109+
"Expected 3 digit octal escape!");
110+
O << EscapedStr[++i];
111+
O << EscapedStr[++i];
112+
O << EscapedStr[++i];
113+
CharsPrinted += 3;
114+
} else {
115+
O << EscapedStr[++i];
116+
++CharsPrinted;
117+
}
118+
}
119+
O << "\"";
120+
}

llvm/utils/TableGen/AsmMatcherEmitter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
#include "llvm/Support/CommandLine.h"
111111
#include "llvm/Support/Debug.h"
112112
#include "llvm/Support/ErrorHandling.h"
113+
#include "llvm/Support/FormatVariadic.h"
113114
#include "llvm/TableGen/Error.h"
114115
#include "llvm/TableGen/Record.h"
115116
#include "llvm/TableGen/StringMatcher.h"

llvm/utils/TableGen/Basic/SequenceToOffsetTable.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,14 @@
1515
#define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H
1616

1717
#include "llvm/ADT/StringExtras.h"
18-
#include "llvm/Support/CommandLine.h"
1918
#include "llvm/Support/raw_ostream.h"
19+
#include "llvm/TableGen/Main.h"
2020
#include <algorithm>
2121
#include <cassert>
2222
#include <functional>
2323
#include <map>
2424

2525
namespace llvm {
26-
extern cl::opt<bool> EmitLongStrLiterals;
2726

2827
inline void printChar(raw_ostream &OS, char C) {
2928
unsigned char UC(C);

llvm/utils/TableGen/Basic/TableGen.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,6 @@
2626

2727
using namespace llvm;
2828

29-
namespace llvm {
30-
cl::opt<bool> EmitLongStrLiterals(
31-
"long-string-literals",
32-
cl::desc("when emitting large string tables, prefer string literals over "
33-
"comma-separated char literals. This can be a readability and "
34-
"compile-time performance win, but upsets some compilers"),
35-
cl::Hidden, cl::init(true));
36-
} // end namespace llvm
37-
3829
static cl::OptionCategory PrintEnumsCat("Options for -print-enums");
3930
static cl::opt<std::string> Class("class",
4031
cl::desc("Print Enum list for this class"),

llvm/utils/TableGen/SDNodeInfoEmitter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Basic/SequenceToOffsetTable.h"
1010
#include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo.
1111
#include "llvm/Support/CommandLine.h"
12+
#include "llvm/Support/FormatVariadic.h"
1213
#include "llvm/TableGen/Error.h"
1314
#include "llvm/TableGen/StringToOffsetTable.h"
1415
#include "llvm/TableGen/TableGenBackend.h"

llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ static_library("TableGen") {
1010
"Record.cpp",
1111
"SetTheory.cpp",
1212
"StringMatcher.cpp",
13+
"StringToOffsetTable.cpp",
1314
"TGLexer.cpp",
1415
"TGParser.cpp",
1516
"TGTimer.cpp",

0 commit comments

Comments
 (0)