Skip to content

Enable a mode in the parser in which it inspects alternative token choices to mutate test cases #1340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ if ProcessInfo.processInfo.environment["SWIFTSYNTAX_ENABLE_RAWSYNTAX_VALIDATION"
]
}

var swiftParserSwiftSettings: [SwiftSetting] = []
if ProcessInfo.processInfo.environment["SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION"] != nil {
swiftParserSwiftSettings += [
.define("SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION")
]
}

let package = Package(
name: "SwiftSyntax",
platforms: [
Expand Down Expand Up @@ -170,12 +177,15 @@ let package = Package(
.target(
name: "SwiftParser",
dependencies: ["SwiftSyntax"],
exclude: ["CMakeLists.txt", "README.md"]
exclude: ["CMakeLists.txt", "README.md"],
swiftSettings: swiftParserSwiftSettings

),

.testTarget(
name: "SwiftParserTest",
dependencies: ["_SwiftSyntaxTestSupport", "SwiftDiagnostics", "SwiftOperators", "SwiftParser", "SwiftSyntaxBuilder"]
dependencies: ["_SwiftSyntaxTestSupport", "SwiftDiagnostics", "SwiftOperators", "SwiftParser", "SwiftSyntaxBuilder"],
swiftSettings: swiftParserSwiftSettings
),

// MARK: SwiftParserDiagnostics
Expand Down
14 changes: 14 additions & 0 deletions Sources/SwiftParser/Lexer/LexemeSequence.swift
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,20 @@ extension Lexer {
return remainingText
}
}

#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
/// If `pointer` is in the source buffer of this `LexemeSequence`, return
/// its offset, otherwise `nil`. Should only be used for the parser's
/// alternate token introspection
func offset(of pointer: UnsafePointer<UInt8>) -> Int? {
let offset = pointer - self.sourceBufferStart.input.baseAddress!
if offset <= self.sourceBufferStart.input.count {
return offset
} else {
return nil
}
}
#endif
}

@_spi(RawSyntax)
Expand Down
6 changes: 6 additions & 0 deletions Sources/SwiftParser/Lookahead.swift
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ extension Parser.Lookahead: TokenConsumer {
mutating func eat(_ spec: TokenSpec) -> Token {
return self.consume(if: spec)!
}

#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
var shouldRecordAlternativeTokenChoices: Bool { false }

mutating public func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {}
#endif
}

extension Parser.Lookahead {
Expand Down
28 changes: 28 additions & 0 deletions Sources/SwiftParser/Parser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,34 @@ public struct Parser {
break
}
}

#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
var shouldRecordAlternativeTokenChoices: Bool = false

public mutating func enableAlternativeTokenChoices() {
shouldRecordAlternativeTokenChoices = true
}

/// When compiled with `SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION`, and
/// `shouldRecordAlternativeTokenChoices` is `true` the parser records which
/// `TokenSpec`s it checked for a token at a specific offset in the source
/// file. The offsets are the location of the token text's start (excluding
/// leading trivia).
///
/// This information allows testing techniques to replace tokens by these
/// alternate token choices to generate new, interesting test cases
@_spi(RawSyntax)
public var alternativeTokenChoices: [Int: [TokenSpec]] = [:]

mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec]) {
guard let lexemeBaseAddress = lexeme.tokenText.baseAddress,
let offset = lexemes.offset(of: lexemeBaseAddress)
else {
return
}
alternativeTokenChoices[offset, default: []].append(contentsOf: choices)
}
#endif
}

// MARK: Inspecting Tokens
Expand Down
10 changes: 10 additions & 0 deletions Sources/SwiftParser/Recovery.swift
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ extension Parser.Lookahead {
_ spec2: TokenSpec,
_ spec3: TokenSpec
) -> RecoveryConsumptionHandle? {
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
if shouldRecordAlternativeTokenChoices {
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
}
#endif
let initialTokensConsumed = self.tokensConsumed

let recoveryPrecedence = min(spec1.recoveryPrecedence, spec2.recoveryPrecedence, spec3.recoveryPrecedence)
Expand Down Expand Up @@ -119,6 +124,11 @@ extension Parser.Lookahead {
anyIn specSet: SpecSet.Type,
overrideRecoveryPrecedence: TokenPrecedence? = nil
) -> (match: SpecSet, handle: RecoveryConsumptionHandle)? {
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
if shouldRecordAlternativeTokenChoices {
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
}
#endif
let initialTokensConsumed = self.tokensConsumed

precondition(!specSet.allCases.isEmpty, "SpecSet must have at least one case")
Expand Down
38 changes: 36 additions & 2 deletions Sources/SwiftParser/TokenConsumer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
@_spi(RawSyntax) import SwiftSyntax

/// A type that consumes instances of `TokenSyntax`.
@_spi(RawSyntax)
public protocol TokenConsumer {
protocol TokenConsumer {
associatedtype Token
/// The current token syntax being examined by the consumer
var currentToken: Lexer.Lexeme { get }
Expand All @@ -32,6 +31,21 @@ public protocol TokenConsumer {
func peek() -> Lexer.Lexeme

func lookahead() -> Parser.Lookahead

#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
var shouldRecordAlternativeTokenChoices: Bool { get }

/// When compiled with `SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION`,
/// record alternative tokens that the parser was looking for at the offset of
/// `lexeme`.
///
/// E.g. if at offset 33, we issue an `at(.leftParen)` call, this will record
/// that `.leftParen` is an interesting token at offset 33. This allows the
/// test case mutators to prefer replacing the current token at offset 33 by a
/// left paren, because apparently this would be a code path that the parser
/// is interested in.
mutating func recordAlternativeTokenChoice(for lexeme: Lexer.Lexeme, choices: [TokenSpec])
#endif
}

// MARK: Checking if we are at one specific token (`at`)
Expand All @@ -51,6 +65,11 @@ extension TokenConsumer {
/// Returns whether the the current token matches `spec`
@inline(__always)
mutating func at(_ spec: TokenSpec) -> Bool {
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
if shouldRecordAlternativeTokenChoices {
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec])
}
#endif
return spec ~= self.currentToken
}

Expand All @@ -60,6 +79,11 @@ extension TokenConsumer {
_ spec1: TokenSpec,
_ spec2: TokenSpec
) -> Bool {
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
if shouldRecordAlternativeTokenChoices {
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2])
}
#endif
switch self.currentToken {
case spec1: return true
case spec2: return true
Expand All @@ -74,6 +98,11 @@ extension TokenConsumer {
_ spec2: TokenSpec,
_ spec3: TokenSpec
) -> Bool {
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
if shouldRecordAlternativeTokenChoices {
recordAlternativeTokenChoice(for: self.currentToken, choices: [spec1, spec2, spec3])
}
#endif
switch self.currentToken {
case spec1: return true
case spec2: return true
Expand All @@ -93,6 +122,11 @@ extension TokenConsumer {
/// as well as a handle to consume that token.
@inline(__always)
mutating func at<SpecSet: TokenSpecSet>(anyIn specSet: SpecSet.Type) -> (SpecSet, TokenConsumptionHandle)? {
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
if shouldRecordAlternativeTokenChoices {
recordAlternativeTokenChoice(for: self.currentToken, choices: specSet.allCases.map(\.spec))
}
#endif
if let matchedKind = SpecSet(lexeme: self.currentToken) {
precondition(matchedKind.spec ~= self.currentToken)
return (
Expand Down
26 changes: 25 additions & 1 deletion Sources/SwiftParser/TokenSpec.swift
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ struct PrepareForKeywordMatch {
/// marked `@inline(__always)` so the compiler inlines the `RawTokenKind` we are
/// matching against and is thus able to rule out one of the branches in
/// `matches(rawTokenKind:text:)` based on the matched kind.
struct TokenSpec {
@_spi(RawSyntax)
public struct TokenSpec {
/// The kind we expect the token that we want to consume to have.
/// This can be a keyword, in which case the `TokenSpec` will also match an
/// identifier with the same text as the keyword and remap it to that keyword
Expand Down Expand Up @@ -160,6 +161,29 @@ struct TokenSpec {
atStartOfLine: lexeme.isAtStartOfLine
)
}

/// Returns a `TokenKind` that will most likely be parsed as a token that
/// matches this `TokenSpec`.
///
/// IMPORTANT: Should only be used when generating tokens during the
/// modification of test cases. This should never be used in the parser itself.
public var synthesizedTokenKind: TokenKind {
switch rawTokenKind {
case .binaryOperator: return .binaryOperator("+")
case .dollarIdentifier: return .dollarIdentifier("$0")
case .extendedRegexDelimiter: return .extendedRegexDelimiter("#")
case .floatingLiteral: return .floatingLiteral("1.0")
case .identifier: return .identifier("myIdent")
case .integerLiteral: return .integerLiteral("1")
case .keyword: return .keyword(keyword!)
case .postfixOperator: return .postfixOperator("++")
case .prefixOperator: return .prefixOperator("!")
case .rawStringDelimiter: return .rawStringDelimiter("#")
case .regexLiteralPattern: return .regexLiteralPattern(".*")
case .stringSegment: return .stringSegment("abc")
default: return TokenKind.fromRaw(kind: rawTokenKind, text: "")
}
}
}

extension TokenConsumer {
Expand Down
111 changes: 77 additions & 34 deletions Tests/SwiftParserTest/Assertions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,39 @@ func assertDiagnostic<T: SyntaxProtocol>(
}
}

class MutatedTreePrinter: SyntaxVisitor {
private var mutations: [Int: TokenSpec] = [:]
private var printedSource: [UInt8] = []

/// Prints `tree` by replacing the tokens whose offset is in `mutations` by
/// a token that matches the corresponding `TokenSpec`.
static func print(tree: Syntax, mutations: [Int: TokenSpec]) -> [UInt8] {
let printer = MutatedTreePrinter(mutations: mutations)
printer.walk(tree)
return printer.printedSource
}

private init(mutations: [Int: TokenSpec]) {
self.mutations = mutations
super.init(viewMode: .sourceAccurate)
}

override func visit(_ node: TokenSyntax) -> SyntaxVisitorContinueKind {
if let mutation = mutations[node.positionAfterSkippingLeadingTrivia.utf8Offset] {
let token = TokenSyntax(
mutation.synthesizedTokenKind,
leadingTrivia: node.leadingTrivia,
trailingTrivia: node.trailingTrivia,
presence: .present
)
printedSource.append(contentsOf: token.syntaxTextBytes)
return .skipChildren
}
printedSource.append(contentsOf: node.syntaxTextBytes)
return .skipChildren
}
}

public struct AssertParseOptions: OptionSet {
public var rawValue: UInt8

Expand Down Expand Up @@ -497,38 +530,6 @@ func assertParse(
)
}

/// Same as `assertParse` overload with a `(String) -> S` `parse`,
/// constructing a `Parser` from the given `String` and passing that to
/// `parse` instead.
func assertParse<S: SyntaxProtocol>(
_ markedSource: String,
_ parse: (inout Parser) -> S,
substructure expectedSubstructure: Syntax? = nil,
substructureAfterMarker: String = "START",
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
applyFixIts: [String]? = nil,
fixedSource expectedFixedSource: String? = nil,
options: AssertParseOptions = [],
file: StaticString = #file,
line: UInt = #line
) {
assertParse(
markedSource,
{ (source: String) -> S in
var parser = Parser(source)
return parse(&parser)
},
substructure: expectedSubstructure,
substructureAfterMarker: substructureAfterMarker,
diagnostics: expectedDiagnostics,
applyFixIts: applyFixIts,
fixedSource: expectedFixedSource,
options: options,
file: file,
line: line
)
}

/// Removes any test markers from `markedSource` (1) and parses the result
/// using `parse`. By default it only checks if the parsed syntax tree is
/// printable back to the origin source, ie. it round trips.
Expand All @@ -549,7 +550,7 @@ func assertParse<S: SyntaxProtocol>(
/// this string.
func assertParse<S: SyntaxProtocol>(
_ markedSource: String,
_ parse: (String) -> S,
_ parse: (inout Parser) -> S,
substructure expectedSubstructure: Syntax? = nil,
substructureAfterMarker: String = "START",
diagnostics expectedDiagnostics: [DiagnosticSpec] = [],
Expand All @@ -563,7 +564,15 @@ func assertParse<S: SyntaxProtocol>(
var (markerLocations, source) = extractMarkers(markedSource)
markerLocations["START"] = 0

let tree: S = parse(source)
var parser = Parser(source)
#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
let enableTestCaseMutation = ProcessInfo.processInfo.environment["SKIP_LONG_TESTS"] != "1"

if enableTestCaseMutation {
parser.enableAlternativeTokenChoices()
}
#endif
let tree: S = parse(&parser)

// Round-trip
assertStringsEqualWithDiff(
Expand Down Expand Up @@ -623,4 +632,38 @@ func assertParse<S: SyntaxProtocol>(
line: line
)
}

#if SWIFTPARSER_ENABLE_ALTERNATE_TOKEN_INTROSPECTION
if enableTestCaseMutation {
let mutations: [(offset: Int, replacement: TokenSpec)] = parser.alternativeTokenChoices.flatMap { offset, replacements in
return replacements.map { (offset, $0) }
}
DispatchQueue.concurrentPerform(iterations: mutations.count) { index in
let mutation = mutations[index]
let alternateSource = MutatedTreePrinter.print(tree: Syntax(tree), mutations: [mutation.offset: mutation.replacement])
alternateSource.withUnsafeBufferPointer { buf in
let mutatedSource = String(decoding: buf, as: UTF8.self)
// Check that we don't hit any assertions in the parser while parsing
// the mutated source and that it round-trips
var mutatedParser = Parser(buf)
let mutatedTree = parse(&mutatedParser)
assertStringsEqualWithDiff(
"\(mutatedTree)",
mutatedSource,
additionalInfo: """
Mutated source failed to round-trip.

Mutated source:
\(mutatedSource)

Actual syntax tree:
\(mutatedTree.debugDescription)
""",
file: file,
line: line
)
}
}
}
#endif
}
Loading