Skip to content

Commit 79dce85

Browse files
authored
[ML] Warn and error level log throttling (#1615)
This introduces a simple mechanism to throttle logging at WARN and ERROR level per process. In particular, it 1. Always outputs the first WARN or ERROR for a particular log line 2. Thereafter limits to outputting a duplicate line once per hour (wall clock) This should be enough to allow us to check for presence of errors (useful for QA). Also, nearly always, only the first instance of an error is useful.
1 parent 3c88511 commit 79dce85

File tree

10 files changed

+337
-52
lines changed

10 files changed

+337
-52
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
unnecessary computation. (See {ml-pull}1537[#1537].)
5656
* Improvements to time series modeling particularly in relation to adaption to change.
5757
(See {ml-pull})1614[#1614].)
58+
* Warn and error log throttling. (See {ml-pull}1615[#1615].)
5859

5960
=== Bug Fixes
6061

include/core/CLogger.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#ifndef INCLUDED_ml_core_CLogger_h
77
#define INCLUDED_ml_core_CLogger_h
88

9+
#include <core/CLoggerThrottler.h>
910
#include <core/CNamedPipeFactory.h>
1011
#include <core/CNonCopyable.h>
1112
#include <core/ImportExport.h>
@@ -160,6 +161,9 @@ class CORE_EXPORT CLogger : private CNonCopyable {
160161
//! CLogger is a singleton, so we can not just create new instances
161162
void reset();
162163

164+
//! Get the object which performs log throttling.
165+
CLoggerThrottler& throttler();
166+
163167
private:
164168
//! Constructor for a singleton is private.
165169
CLogger();
@@ -193,6 +197,9 @@ class CORE_EXPORT CLogger : private CNonCopyable {
193197

194198
//! The default handler for fatal errors.
195199
TFatalErrorHandler m_FatalErrorHandler;
200+
201+
//! The log throttler.
202+
CLoggerThrottler m_Throttler;
196203
};
197204

198205
CORE_EXPORT std::ostream& operator<<(std::ostream& strm, CLogger::ELevel level);

include/core/CLoggerThrottler.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
#ifndef INCLUDED_ml_core_CLoggerThrottler_h
8+
#define INCLUDED_ml_core_CLoggerThrottler_h
9+
10+
#include <core/ImportExport.h>
11+
12+
#include <boost/unordered_map.hpp>
13+
14+
#include <atomic>
15+
#include <cstdint>
16+
#include <mutex>
17+
18+
namespace ml {
19+
namespace core {
20+
21+
//! \brief Implements per log line throttling.
22+
//!
23+
//! DESCRIPTION:\n
24+
//! This implements global throttling for a program per line log line. By default
25+
//! the same log line will only be emitted once per hour.
26+
//!
27+
//! IMPLEMENTATION:\n
28+
//! This is thread safe but uses a very simple strategy: all accesses to a single
29+
//! hash map are sychronised. We assume that log throttling is only applied to
30+
//! messages which normally occur infrequently; for example, this is only currently
31+
//! applied to WARN and ERROR level logging (see LogMacros.h). So there will be
32+
//! little contention. Furthermore, the overhead of locking and unlocking the mutex
33+
//! should be neglible compared to the work done if the log line were actually
34+
//! emitted. So this should actually give a significant performance improvement
35+
//! if a log line is spamming.
36+
class CORE_EXPORT CLoggerThrottler {
37+
public:
38+
CLoggerThrottler();
39+
CLoggerThrottler(const CLoggerThrottler&) = delete;
40+
CLoggerThrottler& operator=(const CLoggerThrottler&) = delete;
41+
42+
//! Set the minimum interval between repeated log messages.
43+
//!
44+
//! \note This expects the interval in milliseconds.
45+
void minimumLogIntervalMs(std::int64_t minimumLogIntervalMs);
46+
47+
//! Should we skip logging of \p line in \p file?
48+
//!
49+
//! \param[in] file The file containing the log line.
50+
//! \param[in] line The log line.
51+
//! \return A pair comprising the count of log lines since the log line was
52+
//! last output (skip returned false) and whether to output the line or not.
53+
//! \note This is expected to be used in conjuction with __FILE__ and __LINE__
54+
//! macros although any unique identifiers which are a string literal and an
55+
//! integer are permitted.
56+
//! \warning We make no attempt to deduplicate logging from the same line for
57+
//! which __FILE__ resolves to a different string literal; this might conceivably
58+
//! happen for logging in a header. This is not expected to be important (it
59+
//! might result in slightly more logging) and it is possible to work around
60+
//! this by explicitly wrapping the logging, for example:
61+
//! \code
62+
//! if (core::CLogger::instance().throttler().skip("my unique string", 0) == false) {
63+
//! LOG_ERROR(<< ...)
64+
//! }
65+
//! \endcode
66+
std::pair<std::size_t, bool> skip(const char* file, int line);
67+
68+
private:
69+
using TConstCharPtrIntPr = std::pair<const char*, int>;
70+
using TInt64SizePr = std::pair<std::int64_t, std::size_t>;
71+
using TConstCharPtrIntPrInt64SizePrUMap =
72+
boost::unordered_map<TConstCharPtrIntPr, TInt64SizePr>;
73+
74+
private:
75+
std::int64_t m_MinimumLogIntervalMs;
76+
std::mutex m_Mutex;
77+
TConstCharPtrIntPrInt64SizePrUMap m_LastLogTimesAndCounts;
78+
};
79+
}
80+
}
81+
82+
#endif // INCLUDED_ml_core_CLoggerThrottler_h

include/core/LogMacros.h

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
#include <boost/log/sources/severity_logger.hpp>
1313
#include <boost/log/utility/manipulators/add_value.hpp>
1414

15+
#include <cstddef>
1516
#include <sstream>
17+
#include <string>
18+
#include <tuple>
1619

1720
// Location info
1821
#ifdef LOG_LOCATION_INFO
@@ -73,25 +76,46 @@
7376
#ifdef LOG_WARN
7477
#undef LOG_WARN
7578
#endif
76-
#define LOG_WARN(message) \
77-
BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), ml::core::CLogger::E_Warn) \
78-
LOG_LOCATION_INFO \
79-
message
79+
#define LOG_WARN(message) \
80+
{ \
81+
std::size_t countOfWarnMessages; \
82+
bool skipWarnMessage; \
83+
std::tie(countOfWarnMessages, skipWarnMessage) = \
84+
ml::core::CLogger::instance().throttler().skip(__FILE__, __LINE__); \
85+
if (skipWarnMessage == false) { \
86+
BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), \
87+
ml::core::CLogger::E_Warn) \
88+
LOG_LOCATION_INFO \
89+
message << (countOfWarnMessages > 1 \
90+
? " | repeated [" + std::to_string(countOfWarnMessages) + "]" \
91+
: ""); \
92+
} \
93+
}
8094
#ifdef LOG_ERROR
8195
#undef LOG_ERROR
8296
#endif
83-
#define LOG_ERROR(message) \
84-
BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), ml::core::CLogger::E_Error) \
85-
LOG_LOCATION_INFO \
86-
message
97+
#define LOG_ERROR(message) \
98+
{ \
99+
std::size_t countOfErrorMessages; \
100+
bool skipErrorMessage; \
101+
std::tie(countOfErrorMessages, skipErrorMessage) = \
102+
ml::core::CLogger::instance().throttler().skip(__FILE__, __LINE__); \
103+
if (skipErrorMessage == false) { \
104+
BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), \
105+
ml::core::CLogger::E_Error) \
106+
LOG_LOCATION_INFO \
107+
message << (countOfErrorMessages > 1 \
108+
? " | repeated [" + std::to_string(countOfErrorMessages) + "]" \
109+
: ""); \
110+
} \
111+
}
87112
#ifdef LOG_FATAL
88113
#undef LOG_FATAL
89114
#endif
90115
#define LOG_FATAL(message) \
91116
BOOST_LOG_STREAM_SEV(ml::core::CLogger::instance().logger(), ml::core::CLogger::E_Fatal) \
92117
LOG_LOCATION_INFO \
93118
message
94-
95119
#ifdef HANDLE_FATAL
96120
#undef HANDLE_FATAL
97121
#endif

lib/core/CLogger.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ void CLogger::reset() {
177177
m_Reconfigured.store(false);
178178
}
179179

180+
CLoggerThrottler& CLogger::throttler() {
181+
return m_Throttler;
182+
}
183+
180184
CLogger& CLogger::instance() {
181185
static CLogger instance;
182186
return instance;

lib/core/CLoggerThrottler.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
#include <core/CLoggerThrottler.h>
8+
9+
#include <core/CTimeUtils.h>
10+
11+
#include <limits>
12+
13+
namespace ml {
14+
namespace core {
15+
16+
CLoggerThrottler::CLoggerThrottler() : m_MinimumLogIntervalMs{3600 * 1000} {
17+
}
18+
19+
void CLoggerThrottler::minimumLogIntervalMs(std::int64_t minimumLogIntervalMs) {
20+
m_MinimumLogIntervalMs = minimumLogIntervalMs;
21+
}
22+
23+
std::pair<std::size_t, bool> CLoggerThrottler::skip(const char* file, int line) {
24+
25+
auto key = std::make_pair(file, line);
26+
auto now = CTimeUtils::nowMs();
27+
auto value = std::make_pair(std::numeric_limits<std::int64_t>::min(), 0);
28+
29+
// We have to hold the lock while updating the map entry because the same log
30+
// line may be triggered from different threads and we don't try and update
31+
// the last time and count atomically.
32+
33+
std::unique_lock<std::mutex> lock{m_Mutex};
34+
auto& valueRef = m_LastLogTimesAndCounts.emplace(key, value).first->second;
35+
std::size_t count{valueRef.second + 1};
36+
37+
if (now >= valueRef.first + m_MinimumLogIntervalMs) {
38+
valueRef = std::make_pair(now, 0);
39+
return std::make_pair(count, false);
40+
}
41+
++valueRef.second;
42+
return std::make_pair(count, true);
43+
}
44+
}
45+
}

lib/core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ CJsonOutputStreamWrapper.cc \
8787
CJsonStatePersistInserter.cc \
8888
CJsonStateRestoreTraverser.cc \
8989
CLogger.cc \
90+
CLoggerThrottler.cc \
9091
CLoopProgress.cc \
9192
CMemory.cc \
9293
CMemoryUsage.cc \

0 commit comments

Comments
 (0)