Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,9 @@ errors:
- PATTERN_TERM_PAREN
- PIPEPIPEEQ_MULTI_ASSIGN
- REGEXP_ENCODING_OPTION_MISMATCH
- REGEXP_ESCAPED_NON_ASCII_IN_UTF8
- REGEXP_INCOMPAT_CHAR_ENCODING
- REGEXP_INVALID_CHAR_PROPERTY
- REGEXP_INVALID_UNICODE_RANGE
- REGEXP_NON_ESCAPED_MBC
- REGEXP_PARSE_ERROR
Expand Down
6 changes: 0 additions & 6 deletions include/prism/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -933,12 +933,6 @@ struct pm_parser {
*/
bool semantic_token_seen;

/**
* True if the current regular expression being lexed contains only ASCII
* characters.
*/
bool current_regular_expression_ascii_only;

/**
* By default, Ruby always warns about mismatched indentation. This can be
* toggled with a magic comment.
Expand Down
53 changes: 41 additions & 12 deletions include/prism/regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,56 @@
#include <string.h>

/**
* This callback is called by pm_regexp_parse() when a named capture group is found.
* Accumulation state for named capture groups found during regexp parsing.
* The caller initializes this with the call node and passes it to
* pm_regexp_parse. The regexp parser populates match and names as groups
* are found.
*/
typedef void (*pm_regexp_name_callback_t)(const pm_string_t *name, void *data);
typedef struct {
/** The call node wrapping the regular expression node (for =~). */
pm_call_node_t *call;

/** The match write node being built, or NULL if no captures found yet. */
pm_match_write_node_t *match;

/** The list of capture names found so far (for deduplication). */
pm_constant_id_list_t names;
} pm_regexp_name_data_t;

/**
* This callback is called by pm_regexp_parse() when a parse error is found.
* Callback invoked by pm_regexp_parse() for each named capture group found.
*
* @param parser The main parser.
* @param name The name of the capture group.
* @param shared Whether the source content is shared (impacts constant storage).
* @param data The accumulation state for named captures.
*/
typedef void (*pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data);
typedef void (*pm_regexp_name_callback_t)(pm_parser_t *parser, const pm_string_t *name, bool shared, pm_regexp_name_data_t *data);

/**
* Parse a regular expression.
* Parse a regular expression, validate its encoding, and optionally extract
* named capture groups. Returns the encoding flags to set on the node.
*
* @param parser The parser that is currently being used.
* @param source The source code to parse.
* @param size The size of the source code.
* @param extended_mode Whether to parse the regular expression in extended mode.
* @param node The regular expression node to parse and validate.
* @param name_callback The optional callback to call when a named capture group is found.
* @param name_data The optional data to pass to the name callback.
* @param error_callback The callback to call when a parse error is found.
* @param error_data The data to pass to the error callback.
* @param name_data The optional accumulation state for named captures.
* @return The encoding flags to set on the node (e.g., FORCED_UTF8_ENCODING).
*/
PRISM_EXPORTED_FUNCTION pm_node_flags_t pm_regexp_parse(pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data);

/**
* Parse an interpolated regular expression for named capture groups only.
* No encoding validation is performed.
*
* @param parser The parser that is currently being used.
* @param source The source content to parse.
* @param size The length of the source content.
* @param shared Whether the source points into the parser's source buffer.
* @param extended_mode Whether or not the regular expression is in extended mode.
* @param name_callback The callback to call when a named capture group is found.
* @param name_data The accumulation state for named captures.
*/
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data);
void pm_regexp_parse_named_captures(pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data);

#endif
2 changes: 1 addition & 1 deletion snapshots/seattlerb/regexp_escape_extended.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
├── flags: ∅
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,6))
├── flags: newline, static_literal
├── flags: newline, static_literal, forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,5) = "\\“"
├── closing_loc: (1,5)-(1,6) = "/"
Expand Down
Loading
Loading