diff --git a/readme.md b/readme.md index e4e3c9e..0a69a3c 100644 --- a/readme.md +++ b/readme.md @@ -60,6 +60,27 @@ foreach ($parser->parseFileStream($stream) as $query) { Available parsers: `MySqlMultiQueryParser`, `PostgreSqlMultiQueryParser`, `SqlServerMultiQueryParser`, `SqliteMultiQueryParser`. +**Keep leading comments:** + +By default, comments are stripped and only query strings are yielded. To control what happens to +comments, pass a `CommentStrategy` to the parser constructor. The bundled `PrependLeadingComments` +strategy keeps the comments preceding a query as a prefix of that query -- useful when comments +carry meaningful annotations, e.g. so they remain visible in observability tools: + +```php +use Nextras\MultiQueryParser\Strategy\PrependLeadingComments; + +$parser = new MySqlMultiQueryParser(new PrependLeadingComments()); + +$sql = "-- create the users table\nCREATE TABLE users (id INT);"; + +foreach ($parser->parseString($sql) as $query) { + echo $query; // "-- create the users table\nCREATE TABLE users (id INT)" +} +``` + +All comment styles supported by the given dialect (`--`, `/* */`, and `#` for MySQL) that directly precede a query are preserved with their original formatting; only pure leading whitespace is stripped. A comment that sits between two queries is treated as preceding the following one. Comments not followed by any query (e.g. a trailing comment at the end of input) are dropped. + ### License MIT. See full [license](license.md). diff --git a/src/BaseMultiQueryParser.php b/src/BaseMultiQueryParser.php index f5314ac..3f5735b 100644 --- a/src/BaseMultiQueryParser.php +++ b/src/BaseMultiQueryParser.php @@ -5,6 +5,10 @@ use ArrayIterator; use Iterator; use Nextras\MultiQueryParser\Exception\RuntimeException; +use Nextras\MultiQueryParser\Fragment\Comment; +use Nextras\MultiQueryParser\Fragment\Fragment; +use Nextras\MultiQueryParser\Fragment\Query; +use Nextras\MultiQueryParser\Strategy\DropComments; use function feof; use function fopen; use function fread; @@ -12,6 +16,15 @@ abstract class BaseMultiQueryParser implements IMultiQueryParser { + private CommentStrategy $commentStrategy; + + + public function __construct(?CommentStrategy $commentStrategy = null) + { + $this->commentStrategy = $commentStrategy ?? new DropComments(); + } + + /** * @param positive-int $chunkSize * @return Iterator @@ -52,7 +65,32 @@ public function parseString(string $s): Iterator * @param Iterator $stream * @return Iterator */ - abstract public function parseStringStream(Iterator $stream): Iterator; + public function parseStringStream(Iterator $stream): Iterator + { + return $this->commentStrategy->apply($this->parseStringStreamToFragments($stream)); + } + + + /** + * @param Iterator $stream + * @return Iterator + */ + abstract protected function parseStringStreamToFragments(Iterator $stream): Iterator; + + + /** + * @return Iterator + */ + protected function buildFragments(?string $leadingComments, ?string $query): Iterator + { + if ($leadingComments !== null && $leadingComments !== '') { + yield new Comment($leadingComments); + } + + if ($query !== null && $query !== '') { + yield new Query($query); + } + } /** diff --git a/src/CommentStrategy.php b/src/CommentStrategy.php new file mode 100644 index 0000000..92dbb7a --- /dev/null +++ b/src/CommentStrategy.php @@ -0,0 +1,22 @@ + $fragments + * @return Iterator + */ + public function apply(Iterator $fragments): Iterator; +} diff --git a/src/Fragment/Comment.php b/src/Fragment/Comment.php new file mode 100644 index 0000000..a9d395a --- /dev/null +++ b/src/Fragment/Comment.php @@ -0,0 +1,15 @@ +getQueryPattern(';')); foreach ($patternIterator as $match) { + yield from $this->buildFragments($match['leadingComments'] ?? null, $match['query'] ?? null); + if (isset($match['delimiter']) && $match['delimiter'] !== '') { $patternIterator->setPattern($this->getQueryPattern($match['delimiter'])); - - } elseif (isset($match['query']) && $match['query'] !== '') { - yield $match['query']; } } } @@ -30,12 +29,15 @@ private function getQueryPattern(string $delimiter): string return /** @lang PhpRegExp */ " ~ - (?: - \\s - | /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/ - | --[^\\n]*+(?:\\n|\\z) - | \\#[^\\n]*+(?:\\n|\\z) - )*+ + \\s*+ + (? + (?: + \\s + | /\\* (*PRUNE) (?: [^*]++ | \\*(?!/) )*+ \\*/ + | --[^\\n]*+(?:\\n|\\z) + | \\#[^\\n]*+(?:\\n|\\z) + )*+ + ) (?: (?i: diff --git a/src/PatternIterator.php b/src/PatternIterator.php index 788239c..374b892 100644 --- a/src/PatternIterator.php +++ b/src/PatternIterator.php @@ -30,7 +30,7 @@ * the regex engine commits to the construct — if the closing delimiter is missing (because * it is in a later chunk), the overall match fails, causing the iterator to load more data. * - * @implements IteratorAggregate> + * @implements IteratorAggregate> */ class PatternIterator implements IteratorAggregate { diff --git a/src/PostgreSqlMultiQueryParser.php b/src/PostgreSqlMultiQueryParser.php index 4fd33c3..5edaae4 100644 --- a/src/PostgreSqlMultiQueryParser.php +++ b/src/PostgreSqlMultiQueryParser.php @@ -7,14 +7,12 @@ class PostgreSqlMultiQueryParser extends BaseMultiQueryParser { - public function parseStringStream(Iterator $stream): Iterator + protected function parseStringStreamToFragments(Iterator $stream): Iterator { $patternIterator = new PatternIterator($stream, $this->getQueryPattern()); foreach ($patternIterator as $match) { - if (isset($match['query']) && $match['query'] !== '') { - yield $match['query']; - } + yield from $this->buildFragments($match['leadingComments'] ?? null, $match['query'] ?? null); } } @@ -29,11 +27,14 @@ private function getQueryPattern(): string (? /\\* (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ ) ) - (?: - \\s - | /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ - | -- [^\\n]*+ - )*+ + \\s*+ + (? + (?: + \\s + | /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ + | -- [^\\n]*+ + )*+ + ) (?: (?: diff --git a/src/SqlServerMultiQueryParser.php b/src/SqlServerMultiQueryParser.php index 2624d9f..09e5b6d 100644 --- a/src/SqlServerMultiQueryParser.php +++ b/src/SqlServerMultiQueryParser.php @@ -7,14 +7,12 @@ class SqlServerMultiQueryParser extends BaseMultiQueryParser { - public function parseStringStream(Iterator $stream): Iterator + protected function parseStringStreamToFragments(Iterator $stream): Iterator { $patternIterator = new PatternIterator($stream, $this->getQueryPattern()); foreach ($patternIterator as $match) { - if (isset($match['query']) && $match['query'] !== '') { - yield $match['query']; - } + yield from $this->buildFragments($match['leadingComments'] ?? null, $match['query'] ?? null); } } @@ -45,11 +43,14 @@ private function getQueryPattern(): string (? /\\* (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ ) ) - (?: - \\s - | /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ - | -- [^\\n]*+ - )*+ + \\s*+ + (? + (?: + \\s + | /\\* (*PRUNE) (?: [^/*]++ | /(?!\\*) | \\*(?!/) | (?&nestedBc) )*+ \\*/ + | -- [^\\n]*+ + )*+ + ) (?: (?: diff --git a/src/SqliteMultiQueryParser.php b/src/SqliteMultiQueryParser.php index f8bdf82..e248dae 100644 --- a/src/SqliteMultiQueryParser.php +++ b/src/SqliteMultiQueryParser.php @@ -7,14 +7,12 @@ class SqliteMultiQueryParser extends BaseMultiQueryParser { - public function parseStringStream(Iterator $stream): Iterator + protected function parseStringStreamToFragments(Iterator $stream): Iterator { $patternIterator = new PatternIterator($stream, $this->getQueryPattern()); foreach ($patternIterator as $match) { - if (isset($match['query']) && $match['query'] !== '') { - yield $match['query']; - } + yield from $this->buildFragments($match['leadingComments'] ?? null, $match['query'] ?? null); } } @@ -55,7 +53,8 @@ private function getQueryPattern(): string ) ) - (?&skip) + \s*+ + (? (?&skip) ) (?: (?: diff --git a/src/Strategy/DropComments.php b/src/Strategy/DropComments.php new file mode 100644 index 0000000..e95d541 --- /dev/null +++ b/src/Strategy/DropComments.php @@ -0,0 +1,20 @@ +sql; + } + } + } +} diff --git a/src/Strategy/PrependLeadingComments.php b/src/Strategy/PrependLeadingComments.php new file mode 100644 index 0000000..0d23cca --- /dev/null +++ b/src/Strategy/PrependLeadingComments.php @@ -0,0 +1,33 @@ +text; + + } elseif ($fragment instanceof Query) { + yield $leadingComments . $fragment->sql; + $leadingComments = ''; + } + } + } +} diff --git a/tests/cases/CommentStrategyTest.phpt b/tests/cases/CommentStrategyTest.phpt new file mode 100644 index 0000000..5274147 --- /dev/null +++ b/tests/cases/CommentStrategyTest.phpt @@ -0,0 +1,84 @@ +apply(new DropComments(), [ + new Comment('-- a'), + new Query('SELECT 1'), + new Comment('-- b'), + new Query('SELECT 2'), + new Comment('-- trailing'), + ]); + + Assert::same(['SELECT 1', 'SELECT 2'], $result); + } + + + public function testPrependLeadingComments(): void + { + $result = $this->apply(new PrependLeadingComments(), [ + new Comment("-- a\n"), + new Query('SELECT 1'), + new Comment("-- b\n"), + new Query('SELECT 2'), + ]); + + Assert::same(["-- a\nSELECT 1", "-- b\nSELECT 2"], $result); + } + + + public function testPrependLeadingCommentsWithoutComments(): void + { + $result = $this->apply(new PrependLeadingComments(), [ + new Query('SELECT 1'), + new Query('SELECT 2'), + ]); + + Assert::same(['SELECT 1', 'SELECT 2'], $result); + } + + + public function testPrependLeadingCommentsDropsTrailingComment(): void + { + $result = $this->apply(new PrependLeadingComments(), [ + new Query('SELECT 1'), + new Comment('-- trailing'), + ]); + + Assert::same(['SELECT 1'], $result); + } + + + /** + * @param list $fragments + * @return list + */ + private function apply(CommentStrategy $strategy, array $fragments): array + { + return iterator_to_array($strategy->apply(new ArrayIterator($fragments)), false); + } +} + + +(new CommentStrategyTest())->run(); diff --git a/tests/cases/MySqlMultiQueryParserTest.phpt b/tests/cases/MySqlMultiQueryParserTest.phpt index 3ffd3c7..03760a0 100644 --- a/tests/cases/MySqlMultiQueryParserTest.phpt +++ b/tests/cases/MySqlMultiQueryParserTest.phpt @@ -6,6 +6,9 @@ namespace Nextras\MultiQueryParser; +use Nextras\MultiQueryParser\Fragment\Comment; +use Nextras\MultiQueryParser\Fragment\Query; +use Nextras\MultiQueryParser\Strategy\PrependLeadingComments; use Tester\Assert; @@ -15,9 +18,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php'; class MySqlMultiQueryParserTest extends MultiQueryParserTestCase { - protected function createParser(): IMultiQueryParser + protected function createParser(?CommentStrategy $commentStrategy = null): IMultiQueryParser { - return new MySqlMultiQueryParser(); + return new MySqlMultiQueryParser($commentStrategy); } @@ -45,6 +48,75 @@ class MySqlMultiQueryParserTest extends MultiQueryParserTestCase } + /** + * MySQL-specific leading-comment cases: # hash comments. The generic line- and + * block-comment cases are covered by the shared test in MultiQueryParserTestCase. + * + * @dataProvider providePreserveLeadingCommentsHashData + * @param list $expectedQueries + */ + public function testPreserveLeadingCommentsHash(string $content, array $expectedQueries): void + { + $parser = $this->createParser(new PrependLeadingComments()); + $queries = iterator_to_array($parser->parseString($content)); + Assert::same($expectedQueries, $queries); + } + + + /** + * @return list}> + */ + protected function providePreserveLeadingCommentsHashData(): array + { + return [ + // # hash comments are preserved as a prefix + [ + "# hash note\nSELECT 1;", + ["# hash note\nSELECT 1"], + ], + // All three comment styles mixed, with original formatting preserved + [ + "-- a\n# b\n/* c */\nSELECT 1;", + ["-- a\n# b\n/* c */\nSELECT 1"], + ], + // A hash comment between two queries attaches to the following query + [ + "SELECT 1; # between\nSELECT 2;", + ["SELECT 1", "# between\nSELECT 2"], + ], + // Hash-comment-only input yields nothing + ["# only a comment", []], + ]; + } + + + /** + * A comment preceding a DELIMITER directive must still be emitted as a Comment fragment, so + * that a strategy can attach it to the following query instead of the parser dropping it. + */ + public function testCommentBeforeDelimiterIsEmittedAsFragment(): void + { + $content = "-- before delimiter\nDELIMITER //\nSELECT 1//"; + + $fragments = $this->collectFragments($content); + Assert::count(2, $fragments); + + $comment = $fragments[0]; + Assert::type(Comment::class, $comment); + assert($comment instanceof Comment); + Assert::same("-- before delimiter\n", $comment->text); + + $query = $fragments[1]; + Assert::type(Query::class, $query); + assert($query instanceof Query); + Assert::same('SELECT 1', $query->sql); + + // under PrependLeadingComments the comment attaches to the following query + $queries = iterator_to_array($this->createParser(new PrependLeadingComments())->parseString($content)); + Assert::same(["-- before delimiter\nSELECT 1"], $queries); + } + + /** * @return list}> */ diff --git a/tests/cases/PostgreSqlMultiQueryParserTest.phpt b/tests/cases/PostgreSqlMultiQueryParserTest.phpt index 39f8763..47b1be2 100644 --- a/tests/cases/PostgreSqlMultiQueryParserTest.phpt +++ b/tests/cases/PostgreSqlMultiQueryParserTest.phpt @@ -15,9 +15,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php'; class PostgreSqlMultiQueryParserTest extends MultiQueryParserTestCase { - protected function createParser(): IMultiQueryParser + protected function createParser(?CommentStrategy $commentStrategy = null): IMultiQueryParser { - return new PostgreSqlMultiQueryParser(); + return new PostgreSqlMultiQueryParser($commentStrategy); } diff --git a/tests/cases/SqlServerMultiQueryParserTest.phpt b/tests/cases/SqlServerMultiQueryParserTest.phpt index 293bde9..42cc779 100644 --- a/tests/cases/SqlServerMultiQueryParserTest.phpt +++ b/tests/cases/SqlServerMultiQueryParserTest.phpt @@ -15,9 +15,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php'; class SqlServerMultiQueryParserTest extends MultiQueryParserTestCase { - protected function createParser(): IMultiQueryParser + protected function createParser(?CommentStrategy $commentStrategy = null): IMultiQueryParser { - return new SqlServerMultiQueryParser(); + return new SqlServerMultiQueryParser($commentStrategy); } diff --git a/tests/cases/SqliteMultiQueryParserTest.phpt b/tests/cases/SqliteMultiQueryParserTest.phpt index 0591875..0302d88 100644 --- a/tests/cases/SqliteMultiQueryParserTest.phpt +++ b/tests/cases/SqliteMultiQueryParserTest.phpt @@ -15,9 +15,9 @@ require_once __DIR__ . '/../inc/MultiQueryParserTestCase.php'; class SqliteMultiQueryParserTest extends MultiQueryParserTestCase { - protected function createParser(): IMultiQueryParser + protected function createParser(?CommentStrategy $commentStrategy = null): IMultiQueryParser { - return new SqliteMultiQueryParser(); + return new SqliteMultiQueryParser($commentStrategy); } diff --git a/tests/inc/MultiQueryParserTestCase.php b/tests/inc/MultiQueryParserTestCase.php index afc67b3..8f27e7c 100644 --- a/tests/inc/MultiQueryParserTestCase.php +++ b/tests/inc/MultiQueryParserTestCase.php @@ -2,15 +2,20 @@ namespace Nextras\MultiQueryParser; +use Iterator; use LogicException; use Nextras\MultiQueryParser\Exception\RuntimeException; +use Nextras\MultiQueryParser\Fragment\Comment; +use Nextras\MultiQueryParser\Fragment\Fragment; +use Nextras\MultiQueryParser\Fragment\Query; +use Nextras\MultiQueryParser\Strategy\PrependLeadingComments; use Tester\Assert; use Tester\TestCase; abstract class MultiQueryParserTestCase extends TestCase { - abstract protected function createParser(): IMultiQueryParser; + abstract protected function createParser(?CommentStrategy $commentStrategy = null): IMultiQueryParser; /** @@ -80,6 +85,163 @@ public function testChunkBoundary(array $chunks, array $expectedQueries): void } + /** + * Dialect-agnostic leading-comment cases (line and block comments), shared by every + * parser. Dialect-specific comment styles are tested in the subclasses. + * + * @dataProvider provideCommonPreserveLeadingCommentsData + * @param list $expectedQueries + */ + public function testPreserveLeadingComments(string $content, array $expectedQueries): void + { + $parser = $this->createParser(new PrependLeadingComments()); + $queries = iterator_to_array($parser->parseString($content)); + Assert::same($expectedQueries, $queries); + } + + + /** + * The restructured leading-comment pattern must keep streaming chunk-safe: + * every two-chunk split of the input must reproduce the whole-string result. + */ + public function testPreserveLeadingCommentsChunkBoundary(): void + { + $parser = $this->createParser(new PrependLeadingComments()); + $content = implode("\n", [ + '-- header comment', + '-- second line', + 'SELECT 1;', + '', + 'SELECT 2;', + '/* block ; with semi */', + 'SELECT 3;', + 'SELECT 4; -- trailing', + '-- leading before 5', + 'SELECT 5;', + ]); + + $expected = iterator_to_array($parser->parseString($content)); + $len = strlen($content); + + for ($i = 0; $i <= $len; $i++) { + $chunks = [substr($content, 0, $i), substr($content, $i)]; + $queries = iterator_to_array($parser->parseStringStream(new \ArrayIterator($chunks))); + Assert::same($expected, $queries, "Failed with chunk boundary at offset $i"); + } + } + + + /** + * A comment trailing the last query (with no query following it) must still be emitted as a + * Comment fragment by the parser, so that a custom CommentStrategy can act on it. The bundled + * strategies happen to drop it, which is why this has to be asserted at the fragment level + * rather than via the yielded query strings. + */ + public function testTrailingCommentIsEmittedAsFragment(): void + { + $fragments = $this->collectFragments("SELECT 1;\n-- trailing"); + Assert::count(2, $fragments); + + $query = $fragments[0]; + Assert::type(Query::class, $query); + assert($query instanceof Query); + Assert::same('SELECT 1', $query->sql); + + $comment = $fragments[1]; + Assert::type(Comment::class, $comment); + assert($comment instanceof Comment); + Assert::same('-- trailing', $comment->text); + } + + + /** + * Parses the content and returns the raw Fragment stream the parser emits (before any + * CommentStrategy collapses it), by plugging in a fragment-collecting strategy. + * + * @return list + */ + protected function collectFragments(string $content): array + { + $strategy = new class implements CommentStrategy { + /** @var list */ + public array $fragments = []; + + + public function apply(Iterator $fragments): Iterator + { + foreach ($fragments as $fragment) { + $this->fragments[] = $fragment; + } + + yield from []; + } + }; + + iterator_to_array($this->createParser($strategy)->parseString($content)); + + return $strategy->fragments; + } + + + /** + * @return list}> + */ + protected function provideCommonPreserveLeadingCommentsData(): array + { + return [ + // A single -- comment kept as a prefix of the following query + [ + "-- create the users table\nCREATE TABLE users (id INT);", + ["-- create the users table\nCREATE TABLE users (id INT)"], + ], + // Multiple consecutive -- comment lines + [ + "-- line 1\n-- line 2\nSELECT 1;", + ["-- line 1\n-- line 2\nSELECT 1"], + ], + // Each query keeps only its own leading comment + [ + "-- first\nSELECT 1;\n-- second\nSELECT 2;", + ["-- first\nSELECT 1", "-- second\nSELECT 2"], + ], + // A comment between two queries attaches to the following query + [ + "SELECT 1; -- between\nSELECT 2;", + ["SELECT 1", "-- between\nSELECT 2"], + ], + // /* */ block comments are preserved too + [ + "/* block */ SELECT 1;", + ["/* block */ SELECT 1"], + ], + // Mixed comment types preserve their original formatting + [ + "-- a\n/* b */\nSELECT 1;", + ["-- a\n/* b */\nSELECT 1"], + ], + // Pure leading whitespace / blank lines before the comment are stripped + [ + "\n\n-- spaced\n\nSELECT 1;", + ["-- spaced\n\nSELECT 1"], + ], + // Comment-only input yields nothing (no query to attach to) + ["-- only a comment", []], + ["-- line 1\n-- line 2\n", []], + ["/* only a block */", []], + // A trailing comment after the last query (no following query) is dropped + [ + "SELECT 1;\n-- trailing", + ["SELECT 1"], + ], + // Pure whitespace produces no leading prefix + [ + "\n\nSELECT 1;\n\n", + ["SELECT 1"], + ], + ]; + } + + public function testFile(): void { $parser = $this->createParser();