1use super::error::{ParseWarning, ParseWarningKind};
15
16#[derive(Debug, PartialEq, Eq)]
18pub struct Cst(pub Vec<Node>);
19
20impl Cst {
21 pub fn from_pl_source_code(source: &str) -> (Cst, Vec<ParseWarning>) {
23 parse(source)
24 }
25
26 pub fn display(
28 &self,
29 starting_indent: usize,
30 additional_indent: usize,
31 ) -> impl std::fmt::Display + '_ {
32 DisplaySlice {
33 nodes: &self.0,
34 starting_indent,
35 additional_indent,
36 }
37 }
38}
39
40struct DisplaySlice<'a> {
42 nodes: &'a [Node],
43 starting_indent: usize,
44 additional_indent: usize,
45}
46
47impl<'a> std::fmt::Display for DisplaySlice<'a> {
48 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49 for node in self.nodes {
50 write!(
51 f,
52 "{}",
53 node.display(self.starting_indent, self.additional_indent)
54 )?;
55 }
56 Ok(())
57 }
58}
59
60struct DisplayNode<'a> {
62 node: &'a Node,
63 starting_indent: usize,
64 additional_indent: usize,
65}
66
67impl Node {
68 pub fn display(
70 &self,
71 starting_indent: usize,
72 additional_indent: usize,
73 ) -> impl std::fmt::Display + '_ {
74 DisplayNode {
75 node: self,
76 starting_indent,
77 additional_indent,
78 }
79 }
80}
81
82impl<'a> std::fmt::Display for DisplayNode<'a> {
83 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
84 let starting_indent = self.starting_indent;
85 let additional_indent = self.additional_indent;
86 match &self.node {
87 Node::Comment(e) => {
89 if e.contains('\n') {
90 write!(f, "{}(COMMENT{e}", " ".repeat(starting_indent))?;
93 writeln!(f, "{})", " ".repeat(starting_indent + additional_indent))?;
94 } else {
95 writeln!(f, "{}(COMMENT {e})", " ".repeat(starting_indent))?;
96 }
97 }
98 Node::Regular(v) => {
99 write!(f, "{}({}", " ".repeat(starting_indent), &v.key)?;
100 if let Some(data) = &v.data {
101 write!(f, " {}", data)?;
102 }
103 if let Some(children) = &v.children {
104 writeln!(f)?;
105 for child in children {
106 write!(
107 f,
108 "{}",
109 child.display(starting_indent + additional_indent, additional_indent)
110 )?;
111 }
112 write!(f, "{}", " ".repeat(starting_indent + additional_indent))?;
113 }
114 writeln!(f, ")")?;
115 }
116 }
117 Ok(())
118 }
119}
120
121#[derive(Debug, PartialEq, Eq)]
123pub enum Node {
124 Comment(String),
126 Regular(RegularNode),
128}
129
130#[derive(Debug, PartialEq, Eq)]
132pub struct RegularNode {
133 pub opening_parenthesis_span: std::ops::Range<usize>,
137 pub key: String,
139 pub key_span: std::ops::Range<usize>,
141 pub data: Option<String>,
148 pub data_span: std::ops::Range<usize>,
150 pub children: Option<Vec<Node>>,
157 pub closing_parenthesis_span: std::ops::Range<usize>,
162}
163
164fn parse(s: &str) -> (Cst, Vec<ParseWarning>) {
165 let mut warnings = vec![];
166 let mut cst = Cst(vec![]);
167 let mut stack: Vec<RegularNode> = vec![];
168 let push = |cst: &mut Cst, stack: &mut Vec<RegularNode>, node: Node| match stack.last_mut() {
169 None => cst.0.push(node),
170 Some(tail) => tail.children.get_or_insert(vec![]).push(node),
171 };
172
173 let mut iter = ParseIter::new(s);
174 while let Some((pos, c)) = iter.peek() {
175 match c {
176 '(' => {
177 iter.next();
178 let (key, key_span) = iter.accumulate_key();
179
180 if key == "COMMENT" {
181 let mut comment = String::new();
183 let mut comment_stack: Vec<usize> = vec![pos];
184 for (u, c) in iter.by_ref() {
185 match c {
186 '(' => {
187 comment_stack.push(u);
188 }
189 ')' => {
190 comment_stack.pop();
191 if comment_stack.is_empty() {
192 break;
193 }
194 }
195 _ => {}
196 }
197 comment.push(c);
198 }
199 while comment_stack.len() > 2 {
204 comment.push(')');
205 comment_stack.pop();
206 }
207 if comment_stack.len() == 2 {
208 comment.push(')');
209 }
210 while let Some(u) = comment_stack.pop() {
211 let pos = iter.next_pos;
212 warnings.push(ParseWarning {
213 span: pos..pos,
214 knuth_pltotf_offset: Some(pos),
215 kind: ParseWarningKind::UnbalancedOpeningParenthesis {
216 opening_parenthesis_span: u..u + 1,
217 },
218 });
219 }
220 push(&mut cst, &mut stack, Node::Comment(comment));
221 } else {
222 iter.trim_whitespace();
223 let (data, data_span) = iter.accumulate_string();
224 let value = RegularNode {
225 opening_parenthesis_span: pos..pos + 1,
226 key,
227 key_span,
228 data: Some(data),
229 data_span,
230 children: Some(vec![]),
231 closing_parenthesis_span: 0..0,
233 };
234 stack.push(value);
235 }
236 }
237 ')' => {
238 iter.next();
239 match stack.pop() {
240 None => warnings.push(ParseWarning {
241 span: pos..pos + 1,
242 knuth_pltotf_offset: Some(pos),
243 kind: ParseWarningKind::UnexpectedClosingParenthesis,
244 }),
245 Some(mut finished) => {
246 finished.closing_parenthesis_span = pos..pos + 1;
247 push(&mut cst, &mut stack, Node::Regular(finished));
248 }
249 }
250 }
251 ' ' | '\n' => {
252 iter.next();
253 continue;
254 }
255 _ => {
256 let (junk, span) = iter.accumulate_string();
257 warnings.push(ParseWarning {
258 span: span.clone(),
259 knuth_pltotf_offset: Some(span.start + 1),
260 kind: ParseWarningKind::JunkInsidePropertyList { junk },
261 })
262 }
263 }
264 }
265 while let Some(mut finished) = stack.pop() {
266 let pos = iter.next_pos;
267 warnings.push(ParseWarning {
268 span: pos..pos,
269 knuth_pltotf_offset: Some(pos),
270 kind: ParseWarningKind::UnbalancedOpeningParenthesis {
271 opening_parenthesis_span: finished.opening_parenthesis_span.clone(),
272 },
273 });
274 finished.closing_parenthesis_span = pos..pos;
275 push(&mut cst, &mut stack, Node::Regular(finished));
276 }
277 warnings.extend(iter.warnings);
278 (cst, warnings)
279}
280
281struct ParseIter<'a> {
282 iter: std::iter::Peekable<super::Chars<'a>>,
283 warnings: Vec<ParseWarning>,
284 next_pos: usize,
285}
286
287impl<'a> Iterator for ParseIter<'a> {
288 type Item = (usize, char);
289
290 fn next(&mut self) -> Option<Self::Item> {
291 self.iter.next().map(|c| {
292 let pos = self.next_pos;
293 self.next_pos += 1;
294 (pos, c)
295 })
296 }
297}
298
299impl<'a> ParseIter<'a> {
300 fn peek(&mut self) -> Option<<Self as Iterator>::Item> {
301 self.iter.peek().copied().map(|c| (self.next_pos, c))
302 }
303 fn new(s: &'a str) -> Self {
304 Self {
305 iter: super::Chars::new(s).peekable(),
306 warnings: vec![],
307 next_pos: 0,
308 }
309 }
310 fn trim_whitespace(&mut self) {
311 while let Some((_, c)) = self.peek() {
312 match c {
313 ' ' | '\n' => {
314 self.next();
315 }
316 _ => break,
317 }
318 }
319 }
320 fn accumulate_key(&mut self) -> (String, std::ops::Range<usize>) {
321 self.accumulate_internal(|c| c.is_alphanumeric() || c == '/' || c == '>')
322 }
323 fn accumulate_string(&mut self) -> (String, std::ops::Range<usize>) {
324 self.accumulate_internal(|c| c != ')' && c != '(')
325 }
326 fn accumulate_internal<F: Fn(char) -> bool>(
327 &mut self,
328 is_allowed_char: F,
329 ) -> (String, std::ops::Range<usize>) {
330 let mut s = String::new();
331 let mut start: Option<usize> = None;
332 while let Some((pos, c)) = self.peek() {
333 if is_allowed_char(c) {
334 self.next();
335 s.push(c);
336 start.get_or_insert(pos);
337 } else {
338 break;
339 }
340 }
341 let start = start.unwrap_or(self.next_pos);
342 (s, start..self.next_pos)
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349
350 fn run(source: &str, want: Vec<Node>, want_errors: Vec<ParseWarning>) {
351 let (got, got_errors) = Cst::from_pl_source_code(source);
352 assert_eq!(got, Cst(want));
353 assert_eq!(got_errors, want_errors);
354 }
355
356 macro_rules! cst_test {
357 ( $( ($name: ident, $input: expr, $want: expr, $want_errors: expr, ), )+ ) => {
358 $(
359 #[test]
360 fn $name() {
361 let input = $input;
362 let want = $want;
363 let want_errors = $want_errors;
364 run(input, want, want_errors);
365 }
366 )+
367 };
368 }
369
370 cst_test!(
371 (
372 basic,
373 " (Hello World (Nested One) (Nested Two Two) (Nested Three\nThree))",
374 vec![Node::Regular(RegularNode {
375 opening_parenthesis_span: 1..2,
376 key: "Hello".into(),
377 key_span: 2..7,
378 data: Some("World ".into()),
379 data_span: 8..14,
380 children: vec![
381 Node::Regular(RegularNode {
382 opening_parenthesis_span: 14..15,
383 key: "Nested".into(),
384 key_span: 15..21,
385 data: Some("One".into()),
386 data_span: 22..25,
387 children: vec![].into(),
388 closing_parenthesis_span: 25..26,
389 }),
390 Node::Regular(RegularNode {
391 opening_parenthesis_span: 27..28,
392 key: "Nested".into(),
393 key_span: 28..34,
394 data: Some("Two Two".into()),
395 data_span: 36..43,
396 children: vec![].into(),
397 closing_parenthesis_span: 43..44,
398 }),
399 Node::Regular(RegularNode {
400 opening_parenthesis_span: 45..46,
401 key: "Nested".into(),
402 key_span: 46..52,
403 data: Some("Three\nThree".into()),
404 data_span: 53..64,
405 children: vec![].into(),
406 closing_parenthesis_span: 64..65,
407 }),
408 ]
409 .into(),
410 closing_parenthesis_span: 65..66,
411 }),],
412 vec![],
413 ),
414 (
415 basic_never_ends,
416 "(Hello",
417 vec![Node::Regular(RegularNode {
418 opening_parenthesis_span: 0..1,
419 key: "Hello".into(),
420 key_span: 1..6,
421 data: Some("".into()),
422 data_span: 6..6,
423 children: vec![].into(),
424 closing_parenthesis_span: 6..6,
425 }),],
426 vec![ParseWarning {
427 span: 6..6,
428 knuth_pltotf_offset: Some(6),
429 kind: ParseWarningKind::UnbalancedOpeningParenthesis {
430 opening_parenthesis_span: 0..1,
431 },
432 }],
433 ),
434 (
435 basic_never_ends_2,
436 "(",
437 vec![Node::Regular(RegularNode {
438 opening_parenthesis_span: 0..1,
439 key: "".into(),
440 key_span: 1..1,
441 data: Some("".into()),
442 data_span: 1..1,
443 children: vec![].into(),
444 closing_parenthesis_span: 1..1,
445 }),],
446 vec![ParseWarning {
447 span: 1..1,
448 knuth_pltotf_offset: Some(1),
449 kind: ParseWarningKind::UnbalancedOpeningParenthesis {
450 opening_parenthesis_span: 0..1,
451 },
452 }],
453 ),
454 (
455 empty_node,
456 "()",
457 vec![Node::Regular(RegularNode {
458 opening_parenthesis_span: 0..1,
459 key: "".into(),
460 key_span: 1..1,
461 data: Some("".into()),
462 data_span: 1..1,
463 children: vec![].into(),
464 closing_parenthesis_span: 1..2,
465 }),],
466 vec![],
467 ),
468 (
469 crlf,
470 "(CRLF\r\nGOOD\r\r\nVALUE\r\n)",
471 vec![Node::Regular(RegularNode {
472 opening_parenthesis_span: 0..1,
473 key: "CRLF".into(),
474 key_span: 1..5,
475 data: Some("GOOD\nVALUE\n".into()),
476 data_span: 6..17,
477 children: vec![].into(),
478 closing_parenthesis_span: 17..18,
479 }),],
480 vec![],
481 ),
482 (
483 garbage_string_in_list,
484 "(Hello World) Garbage String (Hola Mundo)",
485 vec![
486 Node::Regular(RegularNode {
487 opening_parenthesis_span: 0..1,
488 key: "Hello".into(),
489 key_span: 1..6,
490 data: Some("World".into()),
491 data_span: 7..12,
492 children: vec![].into(),
493 closing_parenthesis_span: 12..13,
494 }),
495 Node::Regular(RegularNode {
496 opening_parenthesis_span: 29..30,
497 key: "Hola".into(),
498 key_span: 30..34,
499 data: Some("Mundo".into()),
500 data_span: 35..40,
501 children: vec![].into(),
502 closing_parenthesis_span: 40..41,
503 }),
504 ],
505 vec![ParseWarning {
506 span: 14..29,
507 knuth_pltotf_offset: Some(15),
508 kind: ParseWarningKind::JunkInsidePropertyList {
509 junk: "Garbage String ".into(),
510 },
511 }],
512 ),
513 (
514 garbage_closed_parenthesis_in_list,
515 "(Hello World) ) (Hola Mundo)",
516 vec![
517 Node::Regular(RegularNode {
518 opening_parenthesis_span: 0..1,
519 key: "Hello".into(),
520 key_span: 1..6,
521 data: Some("World".into()),
522 data_span: 7..12,
523 children: vec![].into(),
524 closing_parenthesis_span: 12..13,
525 }),
526 Node::Regular(RegularNode {
527 opening_parenthesis_span: 16..17,
528 key: "Hola".into(),
529 key_span: 17..21,
530 data: Some("Mundo".into()),
531 data_span: 22..27,
532 children: vec![].into(),
533 closing_parenthesis_span: 27..28,
534 }),
535 ],
536 vec![ParseWarning {
537 span: 14..15,
538 knuth_pltotf_offset: Some(14),
539 kind: ParseWarningKind::UnexpectedClosingParenthesis,
540 }],
541 ),
542 (
543 comment,
544 "(COMMENT World (Nested One) Hello (Nested Two))",
545 vec![Node::Comment(
546 " World (Nested One) Hello (Nested Two)".into()
547 )],
548 vec![],
549 ),
550 (
551 comment_with_newlines,
552 "(COMMENT World (Nested\nOne) Hello (Nested\nTwo))",
553 vec![Node::Comment(
554 " World (Nested\nOne) Hello (Nested\nTwo)".into()
555 ),],
556 vec![],
557 ),
558 (
559 comment_trailing_space,
560 "(COMMENT World )",
561 vec![Node::Comment(" World ".into()),],
562 vec![],
563 ),
564 (
565 comment_never_ends,
566 "(COMMENT World (",
567 vec![Node::Comment(" World ()".into())],
568 vec![
569 ParseWarning {
570 span: 16..16,
571 knuth_pltotf_offset: Some(16),
572 kind: ParseWarningKind::UnbalancedOpeningParenthesis {
573 opening_parenthesis_span: 15..16,
574 }
575 },
576 ParseWarning {
577 span: 16..16,
578 knuth_pltotf_offset: Some(16),
579 kind: ParseWarningKind::UnbalancedOpeningParenthesis {
580 opening_parenthesis_span: 0..1,
581 }
582 },
583 ],
584 ),
585 (
586 non_visible_ascii_char,
587 "(Hello Worldä)",
588 vec![Node::Regular(RegularNode {
589 opening_parenthesis_span: 0..1,
590 key: "Hello".into(),
591 key_span: 1..6,
592 data: Some("Worldä".into()),
593 data_span: 7..13,
594 children: vec![].into(),
595 closing_parenthesis_span: 13..14,
596 }),],
597 vec![],
598 ),
599 (
600 trailing_space,
601 "(Hello World )",
602 vec![Node::Regular(RegularNode {
603 opening_parenthesis_span: 0..1,
604 key: "Hello".into(),
605 key_span: 1..6,
606 data: Some("World ".into()),
607 data_span: 7..13,
608 children: vec![].into(),
609 closing_parenthesis_span: 13..14,
610 }),],
611 vec![],
612 ),
613 );
614}