Skip to content

Commit f3e13ef

Browse files
committed
Enable a mode in the parser in which it inspects alternative token choices to mutate test cases
The basic idea is that the parser records which `TokenSpec`s it checked for at individual offsets in the source. We can then use that information to generate new, interesting test cases by replacing a token by the one of the `TokenSpec` we checked for. This technique has found 11 bugs in the parser and I’m expecting it to find quite a few more once we assert that tokens have one of the expected kinds. Gathering of that information is hidden behind a conditional compilation flag because just performing the check of whether we want to record alternative token choices inflicts a 6% performance regression, which doesn’t provide any value except when we are running SwiftParserTest.
1 parent e92c961 commit f3e13ef

10 files changed

+257
-55
lines changed

Package.swift

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ if ProcessInfo.processInfo.environment["SWIFTSYNTAX_ENABLE_RAWSYNTAX_VALIDATION"
2828
]
2929
}
3030

31+
var swiftParserSwiftSettings: [SwiftSetting] = []
32+
if ProcessInfo.processInfo.environment["SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION"] != nil {
33+
swiftParserSwiftSettings += [
34+
.define("SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION")
35+
]
36+
}
37+
3138
let package = Package(
3239
name: "SwiftSyntax",
3340
platforms: [
@@ -103,7 +110,8 @@ let package = Package(
103110
exclude: [
104111
"CMakeLists.txt",
105112
"README.md",
106-
]
113+
],
114+
swiftSettings: swiftParserSwiftSettings
107115
),
108116
.target(
109117
name: "SwiftParserDiagnostics",
@@ -196,7 +204,8 @@ let package = Package(
196204
dependencies: [
197205
"SwiftDiagnostics", "SwiftOperators", "SwiftParser",
198206
"_SwiftSyntaxTestSupport", "SwiftSyntaxBuilder",
199-
]
207+
],
208+
swiftSettings: swiftParserSwiftSettings
200209
),
201210
.testTarget(
202211
name: "SwiftParserDiagnosticsTest",

Sources/SwiftParser/Lexer/LexemeSequence.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,20 @@ extension Lexer {
8181
return remainingText
8282
}
8383
}
84+
85+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
86+
/// If `pointer` is in the source buffer of this `LexemeSequence`, return
87+
/// its offset, otherwise `nil`. Should only be used for the parser's
88+
/// alternate token introspection
89+
func offset(of pointer: UnsafePointer<UInt8>) -> Int? {
90+
let offset = pointer - self.sourceBufferStart.input.baseAddress!
91+
if offset <= self.sourceBufferStart.input.count {
92+
return offset
93+
} else {
94+
return nil
95+
}
96+
}
97+
#endif
8498
}
8599

86100
@_spi(RawSyntax)

Sources/SwiftParser/Lookahead.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ extension Parser.Lookahead: TokenConsumer {
7070
mutating func eat(_ spec: TokenSpec) -> Token {
7171
return self.consume(if: spec)!
7272
}
73+
74+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
75+
var shouldRecordAlternativeTokenChoices: Bool { false }
76+
77+
mutating public func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {}
78+
#endif
7379
}
7480

7581
extension Parser.Lookahead {

Sources/SwiftParser/Parser.swift

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,34 @@ public struct Parser {
207207
break
208208
}
209209
}
210+
211+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
212+
var shouldRecordAlternativeTokenChoices: Bool = false
213+
214+
public mutating func enableAlternativeTokenChoices() {
215+
shouldRecordAlternativeTokenChoices = true
216+
}
217+
218+
/// When compiled with `SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION`, and
219+
/// `shouldRecordAlternativeTokenChoices` is `true` the parser records which
220+
/// `TokenSpec`s it checked for a token at a specific offset in the source
221+
/// file. The offsets are the location of the token text's start (excluding
222+
/// leading trivia).
223+
///
224+
/// This information allows testing techniques to replace tokens by these
225+
/// alternate token choices to generate new, interesting test cases
226+
@_spi(RawSyntax)
227+
public var alternativeTokenChoices: [Int: [TokenSpec]] = [:]
228+
229+
mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {
230+
guard let lexemeBaseAddress = lexeme.tokenText.baseAddress,
231+
let offset = lexemes.offset(of: lexemeBaseAddress)
232+
else {
233+
return
234+
}
235+
alternativeTokenChoices[offset, default: []].append(contentsOf: choices)
236+
}
237+
#endif
210238
}
211239

212240
// MARK: Inspecting Tokens

Sources/SwiftParser/Recovery.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ extension Parser.Lookahead {
6767
_ spec2: TokenSpec,
6868
_ spec3: TokenSpec
6969
) -> RecoveryConsumptionHandle? {
70+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
71+
if shouldRecordAlternativeTokenChoices {
72+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
73+
}
74+
#endif
7075
let initialTokensConsumed = self.tokensConsumed
7176

7277
let recoveryPrecedence = min(spec1.recoveryPrecedence, spec2.recoveryPrecedence, spec3.recoveryPrecedence)
@@ -119,6 +124,11 @@ extension Parser.Lookahead {
119124
anyIn specSet: SpecSet.Type,
120125
overrideRecoveryPrecedence: TokenPrecedence? = nil
121126
) -> (match: SpecSet, handle: RecoveryConsumptionHandle)? {
127+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
128+
if shouldRecordAlternativeTokenChoices {
129+
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
130+
}
131+
#endif
122132
let initialTokensConsumed = self.tokensConsumed
123133

124134
precondition(!specSet.allCases.isEmpty, "SpecSet must have at least one case")

Sources/SwiftParser/TokenConsumer.swift

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
@_spi(RawSyntax) import SwiftSyntax
1414

1515
/// A type that consumes instances of `TokenSyntax`.
16-
@_spi(RawSyntax)
17-
public protocol TokenConsumer {
16+
protocol TokenConsumer {
1817
associatedtype Token
1918
/// The current token syntax being examined by the consumer
2019
var currentToken: Lexer.Lexeme { get }
@@ -32,6 +31,21 @@ public protocol TokenConsumer {
3231
func peek() -> Lexer.Lexeme
3332

3433
func lookahead() -> Parser.Lookahead
34+
35+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
36+
var shouldRecordAlternativeTokenChoices: Bool { get }
37+
38+
/// When compiled with `SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION`,
39+
/// record alternative tokens that the parser was looking for at the offset of
40+
/// `lexeme`.
41+
///
42+
/// E.g. if at offset 33, we issue an `at(.leftParen)` call, this will record
43+
/// that `.leftParen` is an interesting token at offset 33. This allows the
44+
/// test case mutators to prefer replacing the current token at offset 33 by a
45+
/// left paren, because apparently this would be a code path that the parser
46+
/// is interested in.
47+
mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec])
48+
#endif
3549
}
3650

3751
// MARK: Checking if we are at one specific token (`at`)
@@ -51,6 +65,11 @@ extension TokenConsumer {
5165
/// Returns whether the the current token matches `spec`
5266
@inline(__always)
5367
mutating func at(_ spec: TokenSpec) -> Bool {
68+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
69+
if shouldRecordAlternativeTokenChoices {
70+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec])
71+
}
72+
#endif
5473
return spec ~= self.currentToken
5574
}
5675

@@ -60,6 +79,11 @@ extension TokenConsumer {
6079
_ spec1: TokenSpec,
6180
_ spec2: TokenSpec
6281
) -> Bool {
82+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
83+
if shouldRecordAlternativeTokenChoices {
84+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2])
85+
}
86+
#endif
6387
switch self.currentToken {
6488
case spec1: return true
6589
case spec2: return true
@@ -74,6 +98,11 @@ extension TokenConsumer {
7498
_ spec2: TokenSpec,
7599
_ spec3: TokenSpec
76100
) -> Bool {
101+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
102+
if shouldRecordAlternativeTokenChoices {
103+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
104+
}
105+
#endif
77106
switch self.currentToken {
78107
case spec1: return true
79108
case spec2: return true
@@ -93,6 +122,11 @@ extension TokenConsumer {
93122
/// as well as a handle to consume that token.
94123
@inline(__always)
95124
mutating func at<SpecSet: TokenSpecSet>(anyIn specSet: SpecSet.Type) -> (SpecSet, TokenConsumptionHandle)? {
125+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
126+
if shouldRecordAlternativeTokenChoices {
127+
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
128+
}
129+
#endif
96130
if let matchedKind = SpecSet(lexeme: self.currentToken) {
97131
precondition(matchedKind.spec ~= self.currentToken)
98132
return (

Sources/SwiftParser/TokenSpec.swift

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ struct PrepareForKeywordMatch {
4444
/// marked `@inline(__always)` so the compiler inlines the `RawTokenKind` we are
4545
/// matching against and is thus able to rule out one of the branches in
4646
/// `matches(rawTokenKind:text:)` based on the matched kind.
47-
struct TokenSpec {
47+
@_spi(RawSyntax)
48+
public struct TokenSpec {
4849
/// The kind we expect the token that we want to consume to have.
4950
/// This can be a keyword, in which case the `TokenSpec` will also match an
5051
/// identifier with the same text as the keyword and remap it to that keyword
@@ -160,6 +161,29 @@ struct TokenSpec {
160161
atStartOfLine: lexeme.isAtStartOfLine
161162
)
162163
}
164+
165+
/// Returns a `TokenKind` that will most likely be parsed as a token that
166+
/// matches this `TokenSpec`.
167+
///
168+
/// IMPORTANT: Should only be used when generating tokens during the
169+
/// modification of test cases. This should never be used in the parser itself.
170+
public var synthesizedTokenKind: TokenKind {
171+
switch rawTokenKind {
172+
case .binaryOperator: return .binaryOperator("+")
173+
case .dollarIdentifier: return .dollarIdentifier("$0")
174+
case .extendedRegexDelimiter: return .extendedRegexDelimiter("#")
175+
case .floatingLiteral: return .floatingLiteral("1.0")
176+
case .identifier: return .identifier("myIdent")
177+
case .integerLiteral: return .integerLiteral("1")
178+
case .keyword: return .keyword(keyword!)
179+
case .postfixOperator: return .postfixOperator("++")
180+
case .prefixOperator: return .prefixOperator("!")
181+
case .rawStringDelimiter: return .rawStringDelimiter("#")
182+
case .regexLiteralPattern: return .regexLiteralPattern(".*")
183+
case .stringSegment: return .stringSegment("abc")
184+
default: return TokenKind.fromRaw(kind: rawTokenKind, text: "")
185+
}
186+
}
163187
}
164188

165189
extension TokenConsumer {

Tests/SwiftParserTest/Assertions.swift

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,39 @@ func assertDiagnostic<T: SyntaxProtocol>(
446446
}
447447
}
448448

449+
class MutatedTreePrinter: SyntaxVisitor {
450+
private var mutations: [Int: TokenSpec] = [:]
451+
private var printedSource: [UInt8] = []
452+
453+
/// Prints `tree` by replacing the tokens whose offset is in `mutations` by
454+
/// a token that matches the corresponding `TokenSpec`.
455+
static func print(tree: Syntax, mutations: [Int: TokenSpec]) -> [UInt8] {
456+
let printer = MutatedTreePrinter(mutations: mutations)
457+
printer.walk(tree)
458+
return printer.printedSource
459+
}
460+
461+
private init(mutations: [Int: TokenSpec]) {
462+
self.mutations = mutations
463+
super.init(viewMode: .sourceAccurate)
464+
}
465+
466+
override func visit(_ node: TokenSyntax) -> SyntaxVisitorContinueKind {
467+
if let mutation = mutations[node.positionAfterSkippingLeadingTrivia.utf8Offset] {
468+
let token = TokenSyntax(
469+
mutation.synthesizedTokenKind,
470+
leadingTrivia: node.leadingTrivia,
471+
trailingTrivia: node.trailingTrivia,
472+
presence: .present
473+
)
474+
printedSource.append(contentsOf: token.syntaxTextBytes)
475+
return .skipChildren
476+
}
477+
printedSource.append(contentsOf: node.syntaxTextBytes)
478+
return .skipChildren
479+
}
480+
}
481+
449482
public struct AssertParseOptions: OptionSet {
450483
public var rawValue: UInt8
451484

@@ -489,38 +522,6 @@ func assertParse(
489522
)
490523
}
491524

492-
/// Same as `assertParse` overload with a `(String) -> S` `parse`,
493-
/// constructing a `Parser` from the given `String` and passing that to
494-
/// `parse` instead.
495-
func assertParse<S: SyntaxProtocol>(
496-
_ markedSource: String,
497-
_ parse: (inout Parser) -> S,
498-
substructure expectedSubstructure: Syntax? = nil,
499-
substructureAfterMarker: String = "START",
500-
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
501-
applyFixIts: [String]? = nil,
502-
fixedSource expectedFixedSource: String? = nil,
503-
options: AssertParseOptions = [],
504-
file: StaticString = #file,
505-
line: UInt = #line
506-
) {
507-
assertParse(
508-
markedSource,
509-
{ (source: String) -> S in
510-
var parser = Parser(source)
511-
return parse(&parser)
512-
},
513-
substructure: expectedSubstructure,
514-
substructureAfterMarker: substructureAfterMarker,
515-
diagnostics: expectedDiagnostics,
516-
applyFixIts: applyFixIts,
517-
fixedSource: expectedFixedSource,
518-
options: options,
519-
file: file,
520-
line: line
521-
)
522-
}
523-
524525
/// Removes any test markers from `markedSource` (1) and parses the result
525526
/// using `parse`. By default it only checks if the parsed syntax tree is
526527
/// printable back to the origin source, ie. it round trips.
@@ -541,7 +542,7 @@ func assertParse<S: SyntaxProtocol>(
541542
/// this string.
542543
func assertParse<S: SyntaxProtocol>(
543544
_ markedSource: String,
544-
_ parse: (String) -> S,
545+
_ parse: (inout Parser) -> S,
545546
substructure expectedSubstructure: Syntax? = nil,
546547
substructureAfterMarker: String = "START",
547548
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
@@ -555,7 +556,15 @@ func assertParse<S: SyntaxProtocol>(
555556
var (markerLocations, source) = extractMarkers(markedSource)
556557
markerLocations["START"] = 0
557558

558-
let tree: S = parse(source)
559+
var parser = Parser(source)
560+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
561+
let enableTestCaseMutation = ProcessInfo.processInfo.environment["SKIP_LONG_TESTS"] != "1"
562+
563+
if enableTestCaseMutation {
564+
parser.enableAlternativeTokenChoices()
565+
}
566+
#endif
567+
let tree: S = parse(&parser)
559568

560569
// Round-trip
561570
assertStringsEqualWithDiff(
@@ -615,4 +624,38 @@ func assertParse<S: SyntaxProtocol>(
615624
line: line
616625
)
617626
}
627+
628+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
629+
if enableTestCaseMutation {
630+
let mutations: [(offset: Int, replacement: TokenSpec)] = parser.alternativeTokenChoices.flatMap { offset, replacements in
631+
return replacements.map { (offset, $0) }
632+
}
633+
DispatchQueue.concurrentPerform(iterations: mutations.count) { index in
634+
let mutation = mutations[index]
635+
let alternateSource = MutatedTreePrinter.print(tree: Syntax(tree), mutations: [mutation.offset: mutation.replacement])
636+
alternateSource.withUnsafeBufferPointer { buf in
637+
let mutatedSource = String(decoding: buf, as: UTF8.self)
638+
// Check that we don't hit any assertions in the parser while parsing
639+
// the mutated source and that it round-trips
640+
var mutatedParser = Parser(buf)
641+
let mutatedTree = parse(&mutatedParser)
642+
assertStringsEqualWithDiff(
643+
"\(mutatedTree)",
644+
mutatedSource,
645+
additionalInfo: """
646+
Mutated source failed to round-trip.
647+
648+
Mutated source:
649+
\(mutatedSource)
650+
651+
Actual syntax tree:
652+
\(mutatedTree.debugDescription)
653+
""",
654+
file: file,
655+
line: line
656+
)
657+
}
658+
}
659+
}
660+
#endif
618661
}

0 commit comments

Comments
 (0)