|
2 | 2 |
|
3 | 3 | namespace Phiki\Adapters\CommonMark\Transformers; |
4 | 4 |
|
| 5 | +use Phiki\Adapters\CommonMark\Transformers\Annotations\Annotation; |
| 6 | +use Phiki\Adapters\CommonMark\Transformers\Annotations\AnnotationRange; |
| 7 | +use Phiki\Adapters\CommonMark\Transformers\Annotations\AnnotationRangeKind; |
| 8 | +use Phiki\Adapters\CommonMark\Transformers\Annotations\AnnotationType; |
5 | 9 | use Phiki\Contracts\RequiresGrammarInterface; |
| 10 | +use Phiki\Grammar\Grammar; |
| 11 | +use Phiki\Phast\Element; |
| 12 | +use Phiki\Support\Arr; |
6 | 13 | use Phiki\Transformers\AbstractTransformer; |
7 | 14 | use Phiki\Transformers\Concerns\RequiresGrammar; |
8 | 15 |
|
9 | 16 | class AnnotationsTransformer extends AbstractTransformer implements RequiresGrammarInterface |
10 | 17 | { |
11 | 18 | use RequiresGrammar; |
| 19 | + |
| 20 | + const ANNOTATION_REGEX = '/\[%s! (?<keyword>%s)(:(?<range>.+))?\]/'; |
| 21 | + |
| 22 | + const DANGLING_LINE_COMMENT_REGEX = '/(%s)\s*$/'; |
| 23 | + |
| 24 | + const COMMON_COMMENT_CHARACTERS = [ |
| 25 | + '#', '//', ['/*', '*/'], ['/**', '*/'], |
| 26 | + ]; |
| 27 | + |
| 28 | + const GRAMMAR_SPECIFIC_COMMENT_CHARACTERS = [ |
| 29 | + Grammar::Antlers->value => ['{{#', '#}}'], |
| 30 | + Grammar::Blade->value => ['{{--', '--}}'], |
| 31 | + Grammar::Coq->value => ['(*', '*)'], |
| 32 | + Grammar::Asm->value => ';', |
| 33 | + Grammar::Html->value => ['<!--', '-->'], |
| 34 | + Grammar::Xml->value => ['<!--', '-->'], |
| 35 | + Grammar::Ini->value => [';'], |
| 36 | + ]; |
| 37 | + |
| 38 | + /** |
| 39 | + * The collected list of annotations. |
| 40 | + * |
| 41 | + * @var array<int, array<Annotation>> |
| 42 | + */ |
| 43 | + protected array $annotations = []; |
| 44 | + |
| 45 | + /** |
| 46 | + * Create a new instance. |
| 47 | + * |
| 48 | + * @param string $prefix The prefix used to denote annotations, e.g. `code` for `[code! highlight]`. |
| 49 | + */ |
| 50 | + public function __construct(protected string $prefix = 'code') {} |
| 51 | + |
| 52 | + /** |
| 53 | + * Preprocess the code block content to discover annotations. |
| 54 | + */ |
| 55 | + public function preprocess(string $code): string |
| 56 | + { |
| 57 | + $lines = preg_split('/\R/', $code); |
| 58 | + $annotations = []; |
| 59 | + $unclosedAnnotationsStack = []; |
| 60 | + $processedAnnotationRegex = sprintf(self::ANNOTATION_REGEX, preg_quote($this->prefix, '/'), implode('|', array_map(fn (string $keyword) => preg_quote($keyword, '/'), array_merge(...array_map(fn (AnnotationType $type) => $type->keywords(), AnnotationType::cases()))))); |
| 61 | + |
| 62 | + for ($i = 0; $i < count($lines); $i++) { |
| 63 | + $line = $lines[$i]; |
| 64 | + |
| 65 | + if (preg_match($processedAnnotationRegex, $line, $matches, PREG_UNMATCHED_AS_NULL | PREG_OFFSET_CAPTURE) === 0) { |
| 66 | + continue; |
| 67 | + } |
| 68 | + |
| 69 | + $type = AnnotationType::fromKeyword($matches['keyword'][0]); |
| 70 | + $annotation = null; |
| 71 | + $unclosed = false; |
| 72 | + |
| 73 | + // If there is no specified range, then it only needs to apply to the current line. |
| 74 | + if ($matches['range'][0] === null) { |
| 75 | + $annotation = new Annotation($type, $i, $i); |
| 76 | + } else { |
| 77 | + $range = AnnotationRange::parse($matches['range'][0], $i); |
| 78 | + |
| 79 | + // Invalid range provided, skip and move on. |
| 80 | + if (! $range) { |
| 81 | + continue; |
| 82 | + } |
| 83 | + |
| 84 | + $unclosed = $range->kind === AnnotationRangeKind::OpenEnded; |
| 85 | + |
| 86 | + // If the range is open ended, then we can add it to the stack to be closed later. |
| 87 | + if ($unclosed) { |
| 88 | + $unclosedAnnotationsStack[] = $annotation = new Annotation($type, $i, $i); |
| 89 | + } elseif ($range->kind === AnnotationRangeKind::End) { |
| 90 | + // If the range is ending something, then we need to find the most recent unclosed annotation of the same type and close it. |
| 91 | + for ($j = count($unclosedAnnotationsStack) - 1; $j >= 0; $j--) { |
| 92 | + if ($unclosedAnnotationsStack[$j]->type === $type) { |
| 93 | + $annotation = array_splice($unclosedAnnotationsStack, $j, 1)[0]; |
| 94 | + $annotation->end = $i; |
| 95 | + break; |
| 96 | + } |
| 97 | + } |
| 98 | + } else { |
| 99 | + // Otherwise, we have a closed range so we can construct the annotation directly. |
| 100 | + $annotation = new Annotation($type, $range->start, $range->end); |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + // We should now try to remove the annotation from the line. |
| 105 | + // We'll first create a clone of the line to work with, removing any trailing whitespace |
| 106 | + // and replacing the annotation itself. |
| 107 | + $trimmed = rtrim(str_replace($matches[0][0], '', $line)); |
| 108 | + |
| 109 | + // We'll also create a variable to store the point as which we should cut off the line. |
| 110 | + $cutoffPoint = strlen($trimmed); |
| 111 | + |
| 112 | + // Some grammars have their own comment characters, e.g. Blade, Antlers, Coq, etc. |
| 113 | + // We'll add those to the list of characters to check. |
| 114 | + $commentChars = array_merge(self::COMMON_COMMENT_CHARACTERS, isset(self::GRAMMAR_SPECIFIC_COMMENT_CHARACTERS[$this->grammar->name]) ? [self::GRAMMAR_SPECIFIC_COMMENT_CHARACTERS[$this->grammar->name]] : []); |
| 115 | + |
| 116 | + // Then we can check for common comment characters at the end of the line. |
| 117 | + // We store a list of these in a constant: |
| 118 | + // - strings are characters for line comments |
| 119 | + // - arrays are beginning and ending comment pairs (block comments) |
| 120 | + [$l, $b] = Arr::partition($commentChars, fn(string|array $chars) => is_string($chars)); |
| 121 | + |
| 122 | + // We'll first check for line comments. |
| 123 | + $processedLineCommentRegex = sprintf(self::DANGLING_LINE_COMMENT_REGEX, implode('|', array_map(fn(string $char) => preg_quote($char, '/'), $l))); |
| 124 | + |
| 125 | + // If we find a match, we can set the cutoff point and skip checking for block comments. |
| 126 | + if (preg_match($processedLineCommentRegex, $trimmed, $lineCommentMatches, PREG_OFFSET_CAPTURE) === 1) { |
| 127 | + $cutoffPoint = $lineCommentMatches[1][1]; |
| 128 | + goto cutoff; |
| 129 | + } |
| 130 | + |
| 131 | + $processedBlockCommentRegex = sprintf( |
| 132 | + '/%s$/', |
| 133 | + implode('|', array_map(fn(array $chars) => sprintf('(%s\s*%s)', preg_quote($chars[0], '/'), preg_quote($chars[1], '/')), $b)), |
| 134 | + ); |
| 135 | + |
| 136 | + // If we find a match, we can set the cutoff point. |
| 137 | + if (preg_match($processedBlockCommentRegex, $trimmed, $blockCommentMatches, PREG_OFFSET_CAPTURE) === 1) { |
| 138 | + $cutoffPoint = $blockCommentMatches[0][1]; |
| 139 | + goto cutoff; |
| 140 | + } |
| 141 | + |
| 142 | + // If we reach here, then we didn't find any comment characters, so we'll just cut off at the annotation itself. |
| 143 | + $cutoffPoint = $matches[0][1]; |
| 144 | + |
| 145 | + cutoff: |
| 146 | + // We can then trim the line down up to the cutoff point. |
| 147 | + $trimmed = substr($trimmed, 0, $cutoffPoint); |
| 148 | + |
| 149 | + // If the line is now completely empty, we can remove the line entirely. |
| 150 | + if (trim($trimmed) === '') { |
| 151 | + // Doing an `unset` here will leave a gap in the array, so we need to make sure we reindex too, |
| 152 | + // since we want future index references to point to the correct lines still. |
| 153 | + unset($lines[$i]); |
| 154 | + $lines = array_values($lines); |
| 155 | + |
| 156 | + // Reindexing will shift all future lines down by one, so we need to decrement $i to account for that. |
| 157 | + $i--; |
| 158 | + } else { |
| 159 | + // Otherwise we can just replace the line with the trimmed version. |
| 160 | + $lines[$i] = $trimmed; |
| 161 | + } |
| 162 | + |
| 163 | + // If the annotation is unclosed, we don't want to add it to the annotations list yet. |
| 164 | + if ($unclosed) { |
| 165 | + continue; |
| 166 | + } |
| 167 | + |
| 168 | + // We can finally add the annotation to the correct place. |
| 169 | + for ($k = $annotation->start; $k <= $annotation->end; $k++) { |
| 170 | + $annotations[$k][] = $annotation; |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + // Any annotations left in the unclosed stack are still unclosed and should be closed at the end of the document. |
| 175 | + foreach ($unclosedAnnotationsStack as $unclosedAnnotation) { |
| 176 | + $unclosedAnnotation->end = count($lines) - 1; |
| 177 | + for ($k = $unclosedAnnotation->start; $k <= $unclosedAnnotation->end; $k++) { |
| 178 | + $annotations[$k][] = $unclosedAnnotation; |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + $this->annotations = $annotations; |
| 183 | + |
| 184 | + return implode("\n", $lines); |
| 185 | + } |
| 186 | + |
| 187 | + public function pre(Element $pre): Element |
| 188 | + { |
| 189 | + if ($this->annotations === []) { |
| 190 | + return $pre; |
| 191 | + } |
| 192 | + |
| 193 | + foreach ($this->annotations as $annotations) { |
| 194 | + foreach ($annotations as $annotation) { |
| 195 | + $annotation->applyToPre($pre); |
| 196 | + } |
| 197 | + } |
| 198 | + |
| 199 | + return $pre; |
| 200 | + } |
| 201 | + |
| 202 | + public function line(Element $span, array $tokens, int $index): Element |
| 203 | + { |
| 204 | + if ($this->annotations === [] || ! isset($this->annotations[$index])) { |
| 205 | + return $span; |
| 206 | + } |
| 207 | + |
| 208 | + foreach ($this->annotations[$index] as $annotation) { |
| 209 | + $annotation->applyToLine($span); |
| 210 | + } |
| 211 | + |
| 212 | + return $span; |
| 213 | + } |
12 | 214 | } |
0 commit comments