Skip to content

Commit cf26631

Browse files
committed
Enable a mode in the parser in which it inspects alternative token choices to mutate test cases
The basic idea is that the parser records which `TokenSpec`s it checked for at individual offsets in the source. We can then use that information to generate new, interesting test cases by replacing a token by the one of the `TokenSpec` we checked for. This technique has found 11 bugs in the parser and I’m expecting it to find quite a few more once we assert that tokens have one of the expected kinds. Gathering of that information is hidden behind a conditional compilation flag because just performing the check of whether we want to record alternative token choices inflicts a 6% performance regression, which doesn’t provide any value except when we are running SwiftParserTest.
1 parent 02da1a1 commit cf26631

File tree

9 files changed

+211
-47
lines changed

9 files changed

+211
-47
lines changed

Sources/SwiftParser/Lexer/LexemeSequence.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,20 @@ extension Lexer {
9494
return remainingText
9595
}
9696
}
97+
98+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
99+
/// If `pointer` is in the source buffer of this `LexemeSequence`, return
100+
/// its offset, otherwise `nil`. Should only be used for the parser's
101+
/// alternate token introspection
102+
func offset(of pointer: UnsafePointer<UInt8>) -> Int? {
103+
let offset = pointer - self.sourceBufferStart.input.baseAddress!
104+
if offset <= self.sourceBufferStart.input.count {
105+
return offset
106+
} else {
107+
return nil
108+
}
109+
}
110+
#endif
97111
}
98112

99113
@_spi(RawSyntax)

Sources/SwiftParser/Lookahead.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ extension Parser.Lookahead: TokenConsumer {
7070
mutating func eat(_ spec: TokenSpec) -> Token {
7171
return self.consume(if: spec)!
7272
}
73+
74+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
75+
var shouldRecordAlternativeTokenChoices: Bool { false }
76+
77+
mutating public func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {}
78+
#endif
7379
}
7480

7581
extension Parser.Lookahead {

Sources/SwiftParser/Parser.swift

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,34 @@ public struct Parser {
207207
break
208208
}
209209
}
210+
211+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
212+
var shouldRecordAlternativeTokenChoices: Bool = false
213+
214+
public mutating func enableAlternativeTokenChoices() {
215+
shouldRecordAlternativeTokenChoices = true
216+
}
217+
218+
/// When compiled with `PARSER_ALTERNATE_TOKEN_INTROSPECTION`, and
219+
/// `shouldRecordAlternativeTokenChoices` is `true` the parser records which
220+
/// `TokenSpec`s it checked for a token at a specific offset in the source
221+
/// file. The offsets are the location of the token text's start (excluding
222+
/// leading trivia).
223+
///
224+
/// This information allows testing techniques to replace tokens by these
225+
/// alternate token choices to generate new, interesting test cases
226+
@_spi(RawSyntax)
227+
public var alternativeTokenChoices: [Int: [TokenSpec]] = [:]
228+
229+
mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {
230+
guard let lexemeBaseAddress = lexeme.tokenText.baseAddress,
231+
let offset = lexemes.offset(of: lexemeBaseAddress)
232+
else {
233+
return
234+
}
235+
alternativeTokenChoices[offset, default: []].append(contentsOf: choices)
236+
}
237+
#endif
210238
}
211239

212240
// MARK: Inspecting Tokens

Sources/SwiftParser/Recovery.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ extension Parser.Lookahead {
6767
_ spec2: TokenSpec,
6868
_ spec3: TokenSpec
6969
) -> RecoveryConsumptionHandle? {
70+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
71+
if shouldRecordAlternativeTokenChoices {
72+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
73+
}
74+
#endif
7075
let initialTokensConsumed = self.tokensConsumed
7176

7277
let recoveryPrecedence = min(spec1.recoveryPrecedence, spec2.recoveryPrecedence, spec3.recoveryPrecedence)
@@ -118,6 +123,11 @@ extension Parser.Lookahead {
118123
anyIn specSet: SpecSet.Type,
119124
overrideRecoveryPrecedence: TokenPrecedence? = nil
120125
) -> (match: SpecSet, handle: RecoveryConsumptionHandle)? {
126+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
127+
if shouldRecordAlternativeTokenChoices {
128+
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
129+
}
130+
#endif
121131
let initialTokensConsumed = self.tokensConsumed
122132

123133
assert(!specSet.allCases.isEmpty, "SpecSet must have at least one case")

Sources/SwiftParser/TokenConsumer.swift

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
@_spi(RawSyntax) import SwiftSyntax
1414

1515
/// A type that consumes instances of `TokenSyntax`.
16-
@_spi(RawSyntax)
17-
public protocol TokenConsumer {
16+
protocol TokenConsumer {
1817
associatedtype Token
1918
/// The current token syntax being examined by the consumer
2019
var currentToken: Lexer.Lexeme { get }
@@ -32,6 +31,21 @@ public protocol TokenConsumer {
3231
func peek() -> Lexer.Lexeme
3332

3433
func lookahead() -> Parser.Lookahead
34+
35+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
36+
var shouldRecordAlternativeTokenChoices: Bool { get }
37+
38+
/// When compiled with `PARSER_ALTERNATE_TOKEN_INTROSPECTION`, record
39+
/// alternative tokens that the parser was looking for at the offset of
40+
/// `lexeme`.
41+
///
42+
/// E.g. if at offset 33, we issue an `at(.leftParen)` call, this will record
43+
/// that `.leftParen` is an interesting token at offset 33. This allows the
44+
/// test case mutators to prefer replacing the current token at offset 33 by a
45+
/// left paren, because apparently this would be a code path that the parser
46+
/// is interested in.
47+
mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec])
48+
#endif
3549
}
3650

3751
// MARK: Checking if we are at one specific token (`at`)
@@ -51,6 +65,11 @@ extension TokenConsumer {
5165
/// Returns whether the the current token matches `spec`
5266
@inline(__always)
5367
mutating func at(_ spec: TokenSpec) -> Bool {
68+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
69+
if shouldRecordAlternativeTokenChoices {
70+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec])
71+
}
72+
#endif
5473
return spec ~= self.currentToken
5574
}
5675

@@ -60,6 +79,11 @@ extension TokenConsumer {
6079
_ spec1: TokenSpec,
6180
_ spec2: TokenSpec
6281
) -> Bool {
82+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
83+
if shouldRecordAlternativeTokenChoices {
84+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2])
85+
}
86+
#endif
6387
switch self.currentToken {
6488
case spec1: return true
6589
case spec2: return true
@@ -74,6 +98,11 @@ extension TokenConsumer {
7498
_ spec2: TokenSpec,
7599
_ spec3: TokenSpec
76100
) -> Bool {
101+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
102+
if shouldRecordAlternativeTokenChoices {
103+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
104+
}
105+
#endif
77106
switch self.currentToken {
78107
case spec1: return true
79108
case spec2: return true
@@ -93,6 +122,11 @@ extension TokenConsumer {
93122
/// as well as a handle to consume that token.
94123
@inline(__always)
95124
mutating func at<SpecSet: TokenSpecSet>(anyIn specSet: SpecSet.Type) -> (SpecSet, TokenConsumptionHandle)? {
125+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
126+
if shouldRecordAlternativeTokenChoices {
127+
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
128+
}
129+
#endif
96130
if let matchedKind = SpecSet(lexeme: self.currentToken) {
97131
assert(matchedKind.spec ~= self.currentToken)
98132
return (

Sources/SwiftParser/TokenSpec.swift

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ struct PrepareForKeywordMatch {
4444
/// marked `@inline(__always)` so the compiler inlines the `RawTokenKind` we are
4545
/// matching against and is thus able to rule out one of the branches in
4646
/// `matches(rawTokenKind:text:)` based on the matched kind.
47-
struct TokenSpec {
47+
@_spi(RawSyntax)
48+
public struct TokenSpec {
4849
/// The kind we expect the token that we want to consume to have.
4950
/// This can be a keyword, in which case the `TokenSpec` will also match an
5051
/// identifier with the same text as the keyword and remap it to that keyword
@@ -160,6 +161,28 @@ struct TokenSpec {
160161
atStartOfLine: lexeme.isAtStartOfLine
161162
)
162163
}
164+
165+
/// Returns a `TokenKind` that will most likely be parsed as a token that
166+
/// matches this `TokenSpec`.
167+
///
168+
/// IMPORTANT: Should only be used when generating tokens during the
169+
/// modification of test cases. This should never be used in the parser itself.
170+
public var synthesizedTokenKind: TokenKind {
171+
switch rawTokenKind {
172+
case .integerLiteral: return .integerLiteral("1")
173+
case .floatingLiteral: return .floatingLiteral("1.0")
174+
case .regexLiteral: return .regexLiteral("/.*/")
175+
case .identifier: return .identifier("myIdent")
176+
case .binaryOperator: return .binaryOperator("+")
177+
case .postfixOperator: return .postfixOperator("++")
178+
case .prefixOperator: return .prefixOperator("!")
179+
case .dollarIdentifier: return .dollarIdentifier("$0")
180+
case .rawStringDelimiter: return .rawStringDelimiter("#")
181+
case .stringSegment: return .stringSegment("abc")
182+
case .keyword: return .keyword(keyword!)
183+
default: return TokenKind.fromRaw(kind: rawTokenKind, text: "")
184+
}
185+
}
163186
}
164187

165188
extension TokenConsumer {

Sources/SwiftParser/generated/TokenSpecStaticMembers.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515

1616
import SwiftSyntax
1717

18-
extension TokenSpec {
18+
@_spi(RawSyntax)
19+
public extension TokenSpec {
1920
static var eof: TokenSpec {
2021
return TokenSpec(.eof)
2122
}

Tests/SwiftParserTest/Assertions.swift

Lines changed: 80 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,39 @@ func AssertDiagnostic<T: SyntaxProtocol>(
446446
}
447447
}
448448

449+
class MutatedTreePrinter: SyntaxVisitor {
450+
private var mutations: [Int: TokenSpec] = [:]
451+
private var printedSource: [UInt8] = []
452+
453+
/// Prints `tree` by replacing the tokens whose offset is in `mutations` by
454+
/// a token that matches the corresponding `TokenSpec`.
455+
static func print(tree: Syntax, mutations: [Int: TokenSpec]) -> [UInt8] {
456+
let printer = MutatedTreePrinter(mutations: mutations)
457+
printer.walk(tree)
458+
return printer.printedSource
459+
}
460+
461+
private init(mutations: [Int: TokenSpec]) {
462+
self.mutations = mutations
463+
super.init(viewMode: .sourceAccurate)
464+
}
465+
466+
override func visit(_ node: TokenSyntax) -> SyntaxVisitorContinueKind {
467+
if let mutation = mutations[node.positionAfterSkippingLeadingTrivia.utf8Offset] {
468+
let token = TokenSyntax(
469+
mutation.synthesizedTokenKind,
470+
leadingTrivia: node.leadingTrivia,
471+
trailingTrivia: node.trailingTrivia,
472+
presence: .present
473+
)
474+
printedSource.append(contentsOf: token.syntaxTextBytes)
475+
return .skipChildren
476+
}
477+
printedSource.append(contentsOf: node.syntaxTextBytes)
478+
return .skipChildren
479+
}
480+
}
481+
449482
public struct AssertParseOptions: OptionSet {
450483
public var rawValue: UInt8
451484

@@ -489,38 +522,6 @@ func AssertParse(
489522
)
490523
}
491524

492-
/// Same as `AssertParse` overload with a `(String) -> S` `parse`,
493-
/// constructing a `Parser` from the given `String` and passing that to
494-
/// `parse` instead.
495-
func AssertParse<S: SyntaxProtocol>(
496-
_ markedSource: String,
497-
_ parse: (inout Parser) -> S,
498-
substructure expectedSubstructure: Syntax? = nil,
499-
substructureAfterMarker: String = "START",
500-
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
501-
applyFixIts: [String]? = nil,
502-
fixedSource expectedFixedSource: String? = nil,
503-
options: AssertParseOptions = [],
504-
file: StaticString = #file,
505-
line: UInt = #line
506-
) {
507-
AssertParse(
508-
markedSource,
509-
{ (source: String) -> S in
510-
var parser = Parser(source)
511-
return parse(&parser)
512-
},
513-
substructure: expectedSubstructure,
514-
substructureAfterMarker: substructureAfterMarker,
515-
diagnostics: expectedDiagnostics,
516-
applyFixIts: applyFixIts,
517-
fixedSource: expectedFixedSource,
518-
options: options,
519-
file: file,
520-
line: line
521-
)
522-
}
523-
524525
/// Removes any test markers from `markedSource` (1) and parses the result
525526
/// using `parse`. By default it only checks if the parsed syntax tree is
526527
/// printable back to the origin source, ie. it round trips.
@@ -541,7 +542,7 @@ func AssertParse<S: SyntaxProtocol>(
541542
/// this string.
542543
func AssertParse<S: SyntaxProtocol>(
543544
_ markedSource: String,
544-
_ parse: (String) -> S,
545+
_ parse: (inout Parser) -> S,
545546
substructure expectedSubstructure: Syntax? = nil,
546547
substructureAfterMarker: String = "START",
547548
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
@@ -555,7 +556,18 @@ func AssertParse<S: SyntaxProtocol>(
555556
var (markerLocations, source) = extractMarkers(markedSource)
556557
markerLocations["START"] = 0
557558

558-
let tree: S = parse(source)
559+
let enableTestCaseMutation = false
560+
// FIXME: Currently, tests are failing when we enable test case mutation.
561+
// Once all of those issues are fixed, this should become
562+
// let enableTestCaseMutation = ProcessInfo.processInfo.environment["SKIP_LONG_TESTS"] != "1"
563+
564+
var parser = Parser(source)
565+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
566+
if enableTestCaseMutation {
567+
parser.enableAlternativeTokenChoices()
568+
}
569+
#endif
570+
let tree: S = parse(&parser)
559571

560572
// Round-trip
561573
AssertStringsEqualWithDiff(
@@ -615,4 +627,38 @@ func AssertParse<S: SyntaxProtocol>(
615627
line: line
616628
)
617629
}
630+
631+
#if PARSER_ALTERNATE_TOKEN_INTROSPECTION
632+
if enableTestCaseMutation {
633+
let mutations: [(offset: Int, replacement: TokenSpec)] = parser.alternativeTokenChoices.flatMap { offset, replacements in
634+
return replacements.map { (offset, $0) }
635+
}
636+
DispatchQueue.concurrentPerform(iterations: mutations.count) { index in
637+
let mutation = mutations[index]
638+
let alternateSource = MutatedTreePrinter.print(tree: Syntax(tree), mutations: [mutation.offset: mutation.replacement])
639+
alternateSource.withUnsafeBufferPointer { buf in
640+
let mutatedSource = String(decoding: buf, as: UTF8.self)
641+
// Check that we don't hit any assertions in the parser while parsing
642+
// the mutated source and that it round-trips
643+
var mutatedParser = Parser(buf)
644+
let mutatedTree = parse(&mutatedParser)
645+
AssertStringsEqualWithDiff(
646+
"\(mutatedTree)",
647+
mutatedSource,
648+
additionalInfo: """
649+
Mutated source failed to round-trip.
650+
651+
Mutated source:
652+
\(mutatedSource)
653+
654+
Actual syntax tree:
655+
\(mutatedTree.recursiveDescription)
656+
""",
657+
file: file,
658+
line: line
659+
)
660+
}
661+
}
662+
}
663+
#endif
618664
}

Tests/SwiftParserTest/Parser+EntryTests.swift

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,20 @@ import XCTest
1616

1717
public class EntryTests: XCTestCase {
1818
func testTopLevelStringParse() throws {
19-
AssertParse("func test() {}", { Parser.parse(source: $0) })
19+
let source = "func test() {}"
20+
let tree = Parser.parse(source: source)
21+
XCTAssert(tree.is(SourceFileSyntax.self))
22+
XCTAssert(!tree.hasError)
23+
XCTAssertEqual(tree.description, source)
2024
}
2125

2226
func testTopLevelBufferParse() throws {
23-
AssertParse(
24-
"func test() {}",
25-
{ (source: String) -> SourceFileSyntax in
26-
var source = source
27-
source.makeContiguousUTF8()
28-
return source.withUTF8 { Parser.parse(source: $0) }
29-
}
30-
)
27+
var source = "func test() {}"
28+
source.makeContiguousUTF8()
29+
let tree = source.withUTF8 { Parser.parse(source: $0) }
30+
XCTAssert(tree.is(SourceFileSyntax.self))
31+
XCTAssert(!tree.hasError)
32+
XCTAssertEqual(tree.description, source)
3133
}
3234

3335
func testSyntaxParse() throws {

0 commit comments

Comments
 (0)