diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index ffcc718b47775..bf914c379e809 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -68,7 +68,9 @@ function(tablegen project ofn) # char literals, instead. If we're cross-compiling, then conservatively assume # that the source might be consumed by MSVC. # [1] https://docs.microsoft.com/en-us/cpp/cpp/compiler-limits?view=vs-2017 - if (MSVC AND project STREQUAL LLVM) + # Don't pass this flag to mlir-src-sharder, since it doesn't support the + # flag, and it doesn't need it. + if (MSVC AND NOT "${project}" STREQUAL "MLIR_SRC_SHARDER") list(APPEND LLVM_TABLEGEN_FLAGS "--long-string-literals=0") endif() if (CMAKE_GENERATOR MATCHES "Visual Studio") diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h index e8c60e2869902..5f68be188de78 100644 --- a/llvm/include/llvm/TableGen/Main.h +++ b/llvm/include/llvm/TableGen/Main.h @@ -13,6 +13,7 @@ #ifndef LLVM_TABLEGEN_MAIN_H #define LLVM_TABLEGEN_MAIN_H +#include "llvm/Support/CommandLine.h" #include namespace llvm { @@ -27,6 +28,10 @@ using TableGenMainFn = bool(raw_ostream &OS, const RecordKeeper &Records); int TableGenMain(const char *argv0, std::function MainFn = nullptr); +/// Controls emitting large character arrays as strings or character arrays. +/// Typically set to false when building with MSVC. +extern cl::opt EmitLongStrLiterals; + } // end namespace llvm #endif // LLVM_TABLEGEN_MAIN_H diff --git a/llvm/include/llvm/TableGen/StringToOffsetTable.h b/llvm/include/llvm/TableGen/StringToOffsetTable.h index e716411514bd6..21795644d4bd6 100644 --- a/llvm/include/llvm/TableGen/StringToOffsetTable.h +++ b/llvm/include/llvm/TableGen/StringToOffsetTable.h @@ -12,8 +12,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" #include namespace llvm { @@ -36,17 +34,7 @@ class StringToOffsetTable { bool empty() const { return StringOffset.empty(); } size_t size() const { return AggregateString.size(); } - unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) { - auto [II, Inserted] = StringOffset.insert({Str, size()}); - if (Inserted) { - // Add the string to the aggregate if this is the first time found. - AggregateString.append(Str.begin(), Str.end()); - if (appendZero) - AggregateString += '\0'; - } - - return II->second; - } + unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true); // Returns the offset of `Str` in the table if its preset, else return // std::nullopt. @@ -69,96 +57,10 @@ class StringToOffsetTable { // `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be // valid identifiers to declare. void EmitStringTableDef(raw_ostream &OS, const Twine &Name, - const Twine &Indent = "") const { - OS << formatv(R"( -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Woverlength-strings" -#endif -{0}static constexpr char {1}Storage[] = )", - Indent, Name); - - // MSVC silently miscompiles string literals longer than 64k in some - // circumstances. When the string table is longer, emit it as an array of - // character literals. - bool UseChars = AggregateString.size() > (64 * 1024); - OS << (UseChars ? "{\n" : "\n"); - - llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n"); - llvm::SmallVector Strings(split(AggregateString, '\0')); - // We should always have an empty string at the start, and because these are - // null terminators rather than separators, we'll have one at the end as - // well. Skip the end one. - assert(Strings.front().empty() && "Expected empty initial string!"); - assert(Strings.back().empty() && - "Expected empty string at the end due to terminators!"); - Strings.pop_back(); - for (StringRef Str : Strings) { - OS << LineSep << Indent << " "; - // If we can, just emit this as a string literal to be concatenated. - if (!UseChars) { - OS << "\""; - OS.write_escaped(Str); - OS << "\\0\""; - continue; - } - - llvm::ListSeparator CharSep(", "); - for (char C : Str) { - OS << CharSep << "'"; - OS.write_escaped(StringRef(&C, 1)); - OS << "'"; - } - OS << CharSep << "'\\0'"; - } - OS << LineSep << Indent << (UseChars ? "};" : " ;"); - - OS << formatv(R"( -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -{0}static constexpr llvm::StringTable {1} = -{0} {1}Storage; -)", - Indent, Name); - } + const Twine &Indent = "") const; // Emit the string as one single string. - void EmitString(raw_ostream &O) const { - // Escape the string. - SmallString<256> EscapedStr; - raw_svector_ostream(EscapedStr).write_escaped(AggregateString); - - O << " \""; - unsigned CharsPrinted = 0; - for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { - if (CharsPrinted > 70) { - O << "\"\n \""; - CharsPrinted = 0; - } - O << EscapedStr[i]; - ++CharsPrinted; - - // Print escape sequences all together. - if (EscapedStr[i] != '\\') - continue; - - assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!"); - if (isDigit(EscapedStr[i + 1])) { - assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && - "Expected 3 digit octal escape!"); - O << EscapedStr[++i]; - O << EscapedStr[++i]; - O << EscapedStr[++i]; - CharsPrinted += 3; - } else { - O << EscapedStr[++i]; - ++CharsPrinted; - } - } - O << "\""; - } + void EmitString(raw_ostream &O) const; }; } // end namespace llvm diff --git a/llvm/lib/TableGen/CMakeLists.txt b/llvm/lib/TableGen/CMakeLists.txt index 84815c7736997..0f9284c8bb999 100644 --- a/llvm/lib/TableGen/CMakeLists.txt +++ b/llvm/lib/TableGen/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_component_library(LLVMTableGen Record.cpp SetTheory.cpp StringMatcher.cpp + StringToOffsetTable.cpp TableGenBackend.cpp TableGenBackendSkeleton.cpp TGLexer.cpp diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 35600bf2f1f86..ea716215e0679 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -64,6 +64,15 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); static cl::opt TimePhases("time-phases", cl::desc("Time phases of parser and backend")); +namespace llvm { +cl::opt EmitLongStrLiterals( + "long-string-literals", + cl::desc("when emitting large string tables, prefer string literals over " + "comma-separated char literals. This can be a readability and " + "compile-time performance win, but upsets some compilers"), + cl::Hidden, cl::init(true)); +} // end namespace llvm + static cl::opt NoWarnOnUnusedTemplateArgs( "no-warn-on-unused-template-args", cl::desc("Disable unused template argument warnings.")); diff --git a/llvm/lib/TableGen/StringToOffsetTable.cpp b/llvm/lib/TableGen/StringToOffsetTable.cpp new file mode 100644 index 0000000000000..d73b5749ad7d5 --- /dev/null +++ b/llvm/lib/TableGen/StringToOffsetTable.cpp @@ -0,0 +1,120 @@ +//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/TableGen/StringToOffsetTable.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Main.h" + +using namespace llvm; + +unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str, + bool appendZero) { + auto [II, Inserted] = StringOffset.insert({Str, size()}); + if (Inserted) { + // Add the string to the aggregate if this is the first time found. + AggregateString.append(Str.begin(), Str.end()); + if (appendZero) + AggregateString += '\0'; + } + + return II->second; +} + +void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS, const Twine &Name, + const Twine &Indent) const { + OS << formatv(R"( +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Woverlength-strings" +#endif +{0}static constexpr char {1}Storage[] = )", + Indent, Name); + + // MSVC silently miscompiles string literals longer than 64k in some + // circumstances. The build system sets EmitLongStrLiterals to false when it + // detects that it is targetting MSVC. When that option is false and the + // string table is longer than 64k, emit it as an array of character + // literals. + bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024); + OS << (UseChars ? "{\n" : "\n"); + + llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n"); + llvm::SmallVector Strings(split(AggregateString, '\0')); + // We should always have an empty string at the start, and because these are + // null terminators rather than separators, we'll have one at the end as + // well. Skip the end one. + assert(Strings.front().empty() && "Expected empty initial string!"); + assert(Strings.back().empty() && + "Expected empty string at the end due to terminators!"); + Strings.pop_back(); + for (StringRef Str : Strings) { + OS << LineSep << Indent << " "; + // If we can, just emit this as a string literal to be concatenated. + if (!UseChars) { + OS << "\""; + OS.write_escaped(Str); + OS << "\\0\""; + continue; + } + + llvm::ListSeparator CharSep(", "); + for (char C : Str) { + OS << CharSep << "'"; + OS.write_escaped(StringRef(&C, 1)); + OS << "'"; + } + OS << CharSep << "'\\0'"; + } + OS << LineSep << Indent << (UseChars ? "};" : " ;"); + + OS << formatv(R"( +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +{0}static constexpr llvm::StringTable {1} = +{0} {1}Storage; +)", + Indent, Name); +} + +void StringToOffsetTable::EmitString(raw_ostream &O) const { + // Escape the string. + SmallString<256> EscapedStr; + raw_svector_ostream(EscapedStr).write_escaped(AggregateString); + + O << " \""; + unsigned CharsPrinted = 0; + for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) { + if (CharsPrinted > 70) { + O << "\"\n \""; + CharsPrinted = 0; + } + O << EscapedStr[i]; + ++CharsPrinted; + + // Print escape sequences all together. + if (EscapedStr[i] != '\\') + continue; + + assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!"); + if (isDigit(EscapedStr[i + 1])) { + assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) && + "Expected 3 digit octal escape!"); + O << EscapedStr[++i]; + O << EscapedStr[++i]; + O << EscapedStr[++i]; + CharsPrinted += 3; + } else { + O << EscapedStr[++i]; + ++CharsPrinted; + } + } + O << "\""; +} diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 24822c847046d..c954163cdeb3a 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -110,6 +110,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" diff --git a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h index 35a9abdc37c82..8da6fbef0672e 100644 --- a/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h +++ b/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h @@ -15,15 +15,14 @@ #define LLVM_UTILS_TABLEGEN_BASIC_SEQUENCETOOFFSETTABLE_H #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Main.h" #include #include #include #include namespace llvm { -extern cl::opt EmitLongStrLiterals; inline void printChar(raw_ostream &OS, char C) { unsigned char UC(C); diff --git a/llvm/utils/TableGen/Basic/TableGen.cpp b/llvm/utils/TableGen/Basic/TableGen.cpp index 80ac93f2b54fb..edb7791500699 100644 --- a/llvm/utils/TableGen/Basic/TableGen.cpp +++ b/llvm/utils/TableGen/Basic/TableGen.cpp @@ -26,15 +26,6 @@ using namespace llvm; -namespace llvm { -cl::opt EmitLongStrLiterals( - "long-string-literals", - cl::desc("when emitting large string tables, prefer string literals over " - "comma-separated char literals. This can be a readability and " - "compile-time performance win, but upsets some compilers"), - cl::Hidden, cl::init(true)); -} // end namespace llvm - static cl::OptionCategory PrintEnumsCat("Options for -print-enums"); static cl::opt Class("class", cl::desc("Print Enum list for this class"), diff --git a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp index 63ee0deb87110..64f03dae83e7d 100644 --- a/llvm/utils/TableGen/SDNodeInfoEmitter.cpp +++ b/llvm/utils/TableGen/SDNodeInfoEmitter.cpp @@ -9,6 +9,7 @@ #include "Basic/SequenceToOffsetTable.h" #include "Common/CodeGenDAGPatterns.h" // For SDNodeInfo. #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/StringToOffsetTable.h" #include "llvm/TableGen/TableGenBackend.h" diff --git a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn index d90df7bc0e57a..b40fdf154b01a 100644 --- a/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/TableGen/BUILD.gn @@ -10,6 +10,7 @@ static_library("TableGen") { "Record.cpp", "SetTheory.cpp", "StringMatcher.cpp", + "StringToOffsetTable.cpp", "TGLexer.cpp", "TGParser.cpp", "TGTimer.cpp",