Skip to content

Commit 011d9c1

Browse files
authored
Merge pull request #1340 from ahoppen/ahoppen/source-alteration
Enable a mode in the parser in which it inspects alternative token choices to mutate test cases
2 parents 06720d7 + b32317c commit 011d9c1

10 files changed

+258
-55
lines changed

Package.swift

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ if ProcessInfo.processInfo.environment["SWIFTSYNTAX_ENABLE_RAWSYNTAX_VALIDATION"
2828
]
2929
}
3030

31+
var swiftParserSwiftSettings: [SwiftSetting] = []
32+
if ProcessInfo.processInfo.environment["SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION"] != nil {
33+
swiftParserSwiftSettings += [
34+
.define("SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION")
35+
]
36+
}
37+
3138
let package = Package(
3239
name: "SwiftSyntax",
3340
platforms: [
@@ -170,12 +177,15 @@ let package = Package(
170177
.target(
171178
name: "SwiftParser",
172179
dependencies: ["SwiftSyntax"],
173-
exclude: ["CMakeLists.txt", "README.md"]
180+
exclude: ["CMakeLists.txt", "README.md"],
181+
swiftSettings: swiftParserSwiftSettings
182+
174183
),
175184

176185
.testTarget(
177186
name: "SwiftParserTest",
178-
dependencies: ["_SwiftSyntaxTestSupport", "SwiftDiagnostics", "SwiftOperators", "SwiftParser", "SwiftSyntaxBuilder"]
187+
dependencies: ["_SwiftSyntaxTestSupport", "SwiftDiagnostics", "SwiftOperators", "SwiftParser", "SwiftSyntaxBuilder"],
188+
swiftSettings: swiftParserSwiftSettings
179189
),
180190

181191
// MARK: SwiftParserDiagnostics

Sources/SwiftParser/Lexer/LexemeSequence.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,20 @@ extension Lexer {
8181
return remainingText
8282
}
8383
}
84+
85+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
86+
/// If `pointer` is in the source buffer of this `LexemeSequence`, return
87+
/// its offset, otherwise `nil`. Should only be used for the parser's
88+
/// alternate token introspection
89+
func offset(of pointer: UnsafePointer<UInt8>) -> Int? {
90+
let offset = pointer - self.sourceBufferStart.input.baseAddress!
91+
if offset <= self.sourceBufferStart.input.count {
92+
return offset
93+
} else {
94+
return nil
95+
}
96+
}
97+
#endif
8498
}
8599

86100
@_spi(RawSyntax)

Sources/SwiftParser/Lookahead.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ extension Parser.Lookahead: TokenConsumer {
7070
mutating func eat(_ spec: TokenSpec) -> Token {
7171
return self.consume(if: spec)!
7272
}
73+
74+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
75+
var shouldRecordAlternativeTokenChoices: Bool { false }
76+
77+
mutating public func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {}
78+
#endif
7379
}
7480

7581
extension Parser.Lookahead {

Sources/SwiftParser/Parser.swift

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,34 @@ public struct Parser {
207207
break
208208
}
209209
}
210+
211+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
212+
var shouldRecordAlternativeTokenChoices: Bool = false
213+
214+
public mutating func enableAlternativeTokenChoices() {
215+
shouldRecordAlternativeTokenChoices = true
216+
}
217+
218+
/// When compiled with `SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION`, and
219+
/// `shouldRecordAlternativeTokenChoices` is `true` the parser records which
220+
/// `TokenSpec`s it checked for a token at a specific offset in the source
221+
/// file. The offsets are the location of the token text's start (excluding
222+
/// leading trivia).
223+
///
224+
/// This information allows testing techniques to replace tokens by these
225+
/// alternate token choices to generate new, interesting test cases
226+
@_spi(RawSyntax)
227+
public var alternativeTokenChoices: [Int: [TokenSpec]] = [:]
228+
229+
mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {
230+
guard let lexemeBaseAddress = lexeme.tokenText.baseAddress,
231+
let offset = lexemes.offset(of: lexemeBaseAddress)
232+
else {
233+
return
234+
}
235+
alternativeTokenChoices[offset, default: []].append(contentsOf: choices)
236+
}
237+
#endif
210238
}
211239

212240
// MARK: Inspecting Tokens

Sources/SwiftParser/Recovery.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,11 @@ extension Parser.Lookahead {
6767
_ spec2: TokenSpec,
6868
_ spec3: TokenSpec
6969
) -> RecoveryConsumptionHandle? {
70+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
71+
if shouldRecordAlternativeTokenChoices {
72+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
73+
}
74+
#endif
7075
let initialTokensConsumed = self.tokensConsumed
7176

7277
let recoveryPrecedence = min(spec1.recoveryPrecedence, spec2.recoveryPrecedence, spec3.recoveryPrecedence)
@@ -119,6 +124,11 @@ extension Parser.Lookahead {
119124
anyIn specSet: SpecSet.Type,
120125
overrideRecoveryPrecedence: TokenPrecedence? = nil
121126
) -> (match: SpecSet, handle: RecoveryConsumptionHandle)? {
127+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
128+
if shouldRecordAlternativeTokenChoices {
129+
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
130+
}
131+
#endif
122132
let initialTokensConsumed = self.tokensConsumed
123133

124134
precondition(!specSet.allCases.isEmpty, "SpecSet must have at least one case")

Sources/SwiftParser/TokenConsumer.swift

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
@_spi(RawSyntax) import SwiftSyntax
1414

1515
/// A type that consumes instances of `TokenSyntax`.
16-
@_spi(RawSyntax)
17-
public protocol TokenConsumer {
16+
protocol TokenConsumer {
1817
associatedtype Token
1918
/// The current token syntax being examined by the consumer
2019
var currentToken: Lexer.Lexeme { get }
@@ -32,6 +31,21 @@ public protocol TokenConsumer {
3231
func peek() -> Lexer.Lexeme
3332

3433
func lookahead() -> Parser.Lookahead
34+
35+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
36+
var shouldRecordAlternativeTokenChoices: Bool { get }
37+
38+
/// When compiled with `SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION`,
39+
/// record alternative tokens that the parser was looking for at the offset of
40+
/// `lexeme`.
41+
///
42+
/// E.g. if at offset 33, we issue an `at(.leftParen)` call, this will record
43+
/// that `.leftParen` is an interesting token at offset 33. This allows the
44+
/// test case mutators to prefer replacing the current token at offset 33 by a
45+
/// left paren, because apparently this would be a code path that the parser
46+
/// is interested in.
47+
mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec])
48+
#endif
3549
}
3650

3751
// MARK: Checking if we are at one specific token (`at`)
@@ -51,6 +65,11 @@ extension TokenConsumer {
5165
/// Returns whether the the current token matches `spec`
5266
@inline(__always)
5367
mutating func at(_ spec: TokenSpec) -> Bool {
68+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
69+
if shouldRecordAlternativeTokenChoices {
70+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec])
71+
}
72+
#endif
5473
return spec ~= self.currentToken
5574
}
5675

@@ -60,6 +79,11 @@ extension TokenConsumer {
6079
_ spec1: TokenSpec,
6180
_ spec2: TokenSpec
6281
) -> Bool {
82+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
83+
if shouldRecordAlternativeTokenChoices {
84+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2])
85+
}
86+
#endif
6387
switch self.currentToken {
6488
case spec1: return true
6589
case spec2: return true
@@ -74,6 +98,11 @@ extension TokenConsumer {
7498
_ spec2: TokenSpec,
7599
_ spec3: TokenSpec
76100
) -> Bool {
101+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
102+
if shouldRecordAlternativeTokenChoices {
103+
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
104+
}
105+
#endif
77106
switch self.currentToken {
78107
case spec1: return true
79108
case spec2: return true
@@ -93,6 +122,11 @@ extension TokenConsumer {
93122
/// as well as a handle to consume that token.
94123
@inline(__always)
95124
mutating func at<SpecSet: TokenSpecSet>(anyIn specSet: SpecSet.Type) -> (SpecSet, TokenConsumptionHandle)? {
125+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
126+
if shouldRecordAlternativeTokenChoices {
127+
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
128+
}
129+
#endif
96130
if let matchedKind = SpecSet(lexeme: self.currentToken) {
97131
precondition(matchedKind.spec ~= self.currentToken)
98132
return (

Sources/SwiftParser/TokenSpec.swift

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ struct PrepareForKeywordMatch {
4444
/// marked `@inline(__always)` so the compiler inlines the `RawTokenKind` we are
4545
/// matching against and is thus able to rule out one of the branches in
4646
/// `matches(rawTokenKind:text:)` based on the matched kind.
47-
struct TokenSpec {
47+
@_spi(RawSyntax)
48+
public struct TokenSpec {
4849
/// The kind we expect the token that we want to consume to have.
4950
/// This can be a keyword, in which case the `TokenSpec` will also match an
5051
/// identifier with the same text as the keyword and remap it to that keyword
@@ -160,6 +161,29 @@ struct TokenSpec {
160161
atStartOfLine: lexeme.isAtStartOfLine
161162
)
162163
}
164+
165+
/// Returns a `TokenKind` that will most likely be parsed as a token that
166+
/// matches this `TokenSpec`.
167+
///
168+
/// IMPORTANT: Should only be used when generating tokens during the
169+
/// modification of test cases. This should never be used in the parser itself.
170+
public var synthesizedTokenKind: TokenKind {
171+
switch rawTokenKind {
172+
case .binaryOperator: return .binaryOperator("+")
173+
case .dollarIdentifier: return .dollarIdentifier("$0")
174+
case .extendedRegexDelimiter: return .extendedRegexDelimiter("#")
175+
case .floatingLiteral: return .floatingLiteral("1.0")
176+
case .identifier: return .identifier("myIdent")
177+
case .integerLiteral: return .integerLiteral("1")
178+
case .keyword: return .keyword(keyword!)
179+
case .postfixOperator: return .postfixOperator("++")
180+
case .prefixOperator: return .prefixOperator("!")
181+
case .rawStringDelimiter: return .rawStringDelimiter("#")
182+
case .regexLiteralPattern: return .regexLiteralPattern(".*")
183+
case .stringSegment: return .stringSegment("abc")
184+
default: return TokenKind.fromRaw(kind: rawTokenKind, text: "")
185+
}
186+
}
163187
}
164188

165189
extension TokenConsumer {

Tests/SwiftParserTest/Assertions.swift

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,39 @@ func assertDiagnostic<T: SyntaxProtocol>(
454454
}
455455
}
456456

457+
class MutatedTreePrinter: SyntaxVisitor {
458+
private var mutations: [Int: TokenSpec] = [:]
459+
private var printedSource: [UInt8] = []
460+
461+
/// Prints `tree` by replacing the tokens whose offset is in `mutations` by
462+
/// a token that matches the corresponding `TokenSpec`.
463+
static func print(tree: Syntax, mutations: [Int: TokenSpec]) -> [UInt8] {
464+
let printer = MutatedTreePrinter(mutations: mutations)
465+
printer.walk(tree)
466+
return printer.printedSource
467+
}
468+
469+
private init(mutations: [Int: TokenSpec]) {
470+
self.mutations = mutations
471+
super.init(viewMode: .sourceAccurate)
472+
}
473+
474+
override func visit(_ node: TokenSyntax) -> SyntaxVisitorContinueKind {
475+
if let mutation = mutations[node.positionAfterSkippingLeadingTrivia.utf8Offset] {
476+
let token = TokenSyntax(
477+
mutation.synthesizedTokenKind,
478+
leadingTrivia: node.leadingTrivia,
479+
trailingTrivia: node.trailingTrivia,
480+
presence: .present
481+
)
482+
printedSource.append(contentsOf: token.syntaxTextBytes)
483+
return .skipChildren
484+
}
485+
printedSource.append(contentsOf: node.syntaxTextBytes)
486+
return .skipChildren
487+
}
488+
}
489+
457490
public struct AssertParseOptions: OptionSet {
458491
public var rawValue: UInt8
459492

@@ -497,38 +530,6 @@ func assertParse(
497530
)
498531
}
499532

500-
/// Same as `assertParse` overload with a `(String) -> S` `parse`,
501-
/// constructing a `Parser` from the given `String` and passing that to
502-
/// `parse` instead.
503-
func assertParse<S: SyntaxProtocol>(
504-
_ markedSource: String,
505-
_ parse: (inout Parser) -> S,
506-
substructure expectedSubstructure: Syntax? = nil,
507-
substructureAfterMarker: String = "START",
508-
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
509-
applyFixIts: [String]? = nil,
510-
fixedSource expectedFixedSource: String? = nil,
511-
options: AssertParseOptions = [],
512-
file: StaticString = #file,
513-
line: UInt = #line
514-
) {
515-
assertParse(
516-
markedSource,
517-
{ (source: String) -> S in
518-
var parser = Parser(source)
519-
return parse(&parser)
520-
},
521-
substructure: expectedSubstructure,
522-
substructureAfterMarker: substructureAfterMarker,
523-
diagnostics: expectedDiagnostics,
524-
applyFixIts: applyFixIts,
525-
fixedSource: expectedFixedSource,
526-
options: options,
527-
file: file,
528-
line: line
529-
)
530-
}
531-
532533
/// Removes any test markers from `markedSource` (1) and parses the result
533534
/// using `parse`. By default it only checks if the parsed syntax tree is
534535
/// printable back to the origin source, ie. it round trips.
@@ -549,7 +550,7 @@ func assertParse<S: SyntaxProtocol>(
549550
/// this string.
550551
func assertParse<S: SyntaxProtocol>(
551552
_ markedSource: String,
552-
_ parse: (String) -> S,
553+
_ parse: (inout Parser) -> S,
553554
substructure expectedSubstructure: Syntax? = nil,
554555
substructureAfterMarker: String = "START",
555556
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
@@ -563,7 +564,15 @@ func assertParse<S: SyntaxProtocol>(
563564
var (markerLocations, source) = extractMarkers(markedSource)
564565
markerLocations["START"] = 0
565566

566-
let tree: S = parse(source)
567+
var parser = Parser(source)
568+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
569+
let enableTestCaseMutation = ProcessInfo.processInfo.environment["SKIP_LONG_TESTS"] != "1"
570+
571+
if enableTestCaseMutation {
572+
parser.enableAlternativeTokenChoices()
573+
}
574+
#endif
575+
let tree: S = parse(&parser)
567576

568577
// Round-trip
569578
assertStringsEqualWithDiff(
@@ -623,4 +632,38 @@ func assertParse<S: SyntaxProtocol>(
623632
line: line
624633
)
625634
}
635+
636+
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
637+
if enableTestCaseMutation {
638+
let mutations: [(offset: Int, replacement: TokenSpec)] = parser.alternativeTokenChoices.flatMap { offset, replacements in
639+
return replacements.map { (offset, $0) }
640+
}
641+
DispatchQueue.concurrentPerform(iterations: mutations.count) { index in
642+
let mutation = mutations[index]
643+
let alternateSource = MutatedTreePrinter.print(tree: Syntax(tree), mutations: [mutation.offset: mutation.replacement])
644+
alternateSource.withUnsafeBufferPointer { buf in
645+
let mutatedSource = String(decoding: buf, as: UTF8.self)
646+
// Check that we don't hit any assertions in the parser while parsing
647+
// the mutated source and that it round-trips
648+
var mutatedParser = Parser(buf)
649+
let mutatedTree = parse(&mutatedParser)
650+
assertStringsEqualWithDiff(
651+
"\(mutatedTree)",
652+
mutatedSource,
653+
additionalInfo: """
654+
Mutated source failed to round-trip.
655+
656+
Mutated source:
657+
\(mutatedSource)
658+
659+
Actual syntax tree:
660+
\(mutatedTree.debugDescription)
661+
""",
662+
file: file,
663+
line: line
664+
)
665+
}
666+
}
667+
}
668+
#endif
626669
}

0 commit comments

Comments
 (0)