|
| 1 | +//===----------------------------------------------------------------------===// |
| 2 | +// |
| 3 | +// This source file is part of the Swift.org open source project |
| 4 | +// |
| 5 | +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors |
| 6 | +// Licensed under Apache License v2.0 with Runtime Library Exception |
| 7 | +// |
| 8 | +// See https://swift.org/LICENSE.txt for license information |
| 9 | +// |
| 10 | +//===----------------------------------------------------------------------===// |
| 11 | + |
| 12 | +// The version number for the regex. This gets emitted as an argument to the |
| 13 | +// Regex(_regexString:version:) initializer and should be bumped if the format |
| 14 | +// of the regex string needs to be changed in such a that requires the runtime |
| 15 | +// to updated. |
| 16 | +public let currentRegexLiteralFormatVersion = 1 |
| 17 | + |
| 18 | +@_spi(CompilerInterface) |
| 19 | +public struct CompilerLexError: Error { |
| 20 | + public var message: String |
| 21 | + public var location: UnsafeRawPointer |
| 22 | + public var completelyErroneous: Bool |
| 23 | +} |
| 24 | + |
| 25 | +/// Interface for the Swift compiler. |
| 26 | +/// |
| 27 | +/// Attempt to lex a regex literal string. |
| 28 | +/// |
| 29 | +/// - Parameters: |
| 30 | +/// - start: The pointer at which to start lexing the literal. |
| 31 | +/// - bufferEnd: A pointer to the end of the buffer, which should not be lexed |
| 32 | +/// past. |
| 33 | +/// - mustBeRegex: Whether we expect a regex literal to be lexed here. If |
| 34 | +/// `false`, a regex literal will only be lexed if it does not |
| 35 | +/// produce an error. |
| 36 | +/// |
| 37 | +/// - Returns: If a regex literal was lexed, `resumePtr` specifies where to |
| 38 | +/// resume lexing and `error` specifies a lexing error to emit. If |
| 39 | +/// a regex literal was not lexed, `nil` is returned. |
| 40 | +/// |
| 41 | +@_spi(CompilerInterface) |
| 42 | +public func swiftCompilerLexRegexLiteral( |
| 43 | + start: UnsafeRawPointer, bufferEnd: UnsafeRawPointer, mustBeRegex: Bool |
| 44 | +) -> (resumePtr: UnsafeRawPointer, error: CompilerLexError?)? { |
| 45 | + do { |
| 46 | + let (_, _, endPtr) = try lexRegex(start: start, end: bufferEnd) |
| 47 | + return (resumePtr: endPtr, error: nil) |
| 48 | + } catch let error as DelimiterLexError { |
| 49 | + if !mustBeRegex { |
| 50 | + // This token can be something else. Let the client fallback. |
| 51 | + return nil |
| 52 | + } |
| 53 | + let completelyErroneous: Bool |
| 54 | + switch error.kind { |
| 55 | + case .unterminated, .multilineClosingNotOnNewline: |
| 56 | + // These can be recovered from. |
| 57 | + completelyErroneous = false |
| 58 | + case .unprintableASCII, .invalidUTF8: |
| 59 | + // We don't currently have good recovery behavior for these. |
| 60 | + completelyErroneous = true |
| 61 | + case .unknownDelimiter: |
| 62 | + // An unknown delimiter should be recovered from, as we may want to try |
| 63 | + // lex something else. |
| 64 | + return nil |
| 65 | + } |
| 66 | + // For now every lexer error is emitted at the starting delimiter. |
| 67 | + let compilerError = CompilerLexError( |
| 68 | + message: "\(error)", location: start, |
| 69 | + completelyErroneous: completelyErroneous |
| 70 | + ) |
| 71 | + return (error.resumePtr, compilerError) |
| 72 | + } catch { |
| 73 | + fatalError("Should be a DelimiterLexError") |
| 74 | + } |
| 75 | +} |
| 76 | + |
| 77 | +@_spi(CompilerInterface) |
| 78 | +public struct CompilerParseError: Error { |
| 79 | + public var message: String |
| 80 | + public var location: String.Index? |
| 81 | +} |
| 82 | + |
| 83 | +/// Interface for the Swift compiler. |
| 84 | +/// |
| 85 | +/// Attempt to parse a regex literal string. |
| 86 | +/// |
| 87 | +/// - Parameters: |
| 88 | +/// - input: The regex input string, including delimiters. |
| 89 | +/// - captureBufferOut: A buffer into which the captures of the regex will |
| 90 | +/// be encoded into upon a successful parse. |
| 91 | +/// |
| 92 | +/// - Returns: The string to emit along with its version number. |
| 93 | +/// - Throws: `CompilerParseError` if there was a parsing error. |
| 94 | +@_spi(CompilerInterface) |
| 95 | +public func swiftCompilerParseRegexLiteral( |
| 96 | + _ input: String, captureBufferOut: UnsafeMutableRawBufferPointer |
| 97 | +) throws -> (regexToEmit: String, version: Int) { |
| 98 | + do { |
| 99 | + let ast = try parseWithDelimiters(input) |
| 100 | + // Serialize the capture structure for later type inference. |
| 101 | + assert(captureBufferOut.count >= input.utf8.count) |
| 102 | + ast.captureStructure.encode(to: captureBufferOut) |
| 103 | + |
| 104 | + // For now we just return the input as the regex to emit. This could be |
| 105 | + // changed in the future if need to back-deploy syntax to something already |
| 106 | + // known to the matching engine, or otherwise change the format. Note |
| 107 | + // however that it will need plumbing through on the compiler side. |
| 108 | + return (regexToEmit: input, version: currentRegexLiteralFormatVersion) |
| 109 | + } catch { |
| 110 | + throw CompilerParseError( |
| 111 | + message: "cannot parse regular expression: \(String(describing: error))", |
| 112 | + location: (error as? LocatedErrorProtocol)?.location.start |
| 113 | + ) |
| 114 | + } |
| 115 | +} |
0 commit comments