1pub mod lexer;
4pub mod trace;
5use crate::types::CatCode;
6use std::{fmt::Display, num};
7use texcraft_stdext::collections::interner;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15pub struct CsName(num::NonZeroU32);
16
17impl CsName {
18 #[inline]
19 pub fn to_usize(&self) -> usize {
20 self.0.get() as usize
21 }
22
23 pub fn try_from_usize(u: usize) -> Option<CsName> {
24 let u = match u32::try_from(u) {
25 Ok(u) => u,
26 Err(_) => return None,
27 };
28 num::NonZeroU32::new(u).map(CsName)
29 }
30}
31
32pub type CsNameInterner = interner::Interner<CsName>;
34
35impl interner::Key for CsName {
36 fn try_from_usize(index: usize) -> Option<Self> {
37 num::NonZeroU32::try_from_usize(index).map(CsName)
38 }
39
40 fn into_usize(self) -> usize {
41 self.0.into_usize()
42 }
43}
44
45#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
47#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
48pub enum Value {
49 BeginGroup(char),
50 EndGroup(char),
51 MathShift(char),
52 AlignmentTab(char),
53 Parameter(char),
54 Superscript(char),
55 Subscript(char),
56 Space(char),
57 Letter(char),
58 Other(char),
59 CommandRef(CommandRef),
60}
61
62#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
64#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
65pub enum CommandRef {
66 ControlSequence(CsName),
67 ActiveCharacter(char),
68}
69
70impl CommandRef {
71 pub fn to_string(&self, cs_name_interner: &CsNameInterner) -> String {
72 match self {
73 CommandRef::ControlSequence(cs_name) => {
74 format!("\\{}", cs_name_interner.resolve(*cs_name).unwrap())
75 }
76 CommandRef::ActiveCharacter(c) => format!("{c}"),
77 }
78 }
79}
80
81impl Value {
82 pub fn new(c: char, cat_code: CatCode) -> Value {
83 match cat_code {
84 CatCode::BeginGroup => Value::BeginGroup(c),
85 CatCode::EndGroup => Value::EndGroup(c),
86 CatCode::MathShift => Value::MathShift(c),
87 CatCode::AlignmentTab => Value::AlignmentTab(c),
88 CatCode::Parameter => Value::Parameter(c),
89 CatCode::Superscript => Value::Superscript(c),
90 CatCode::Subscript => Value::Subscript(c),
91 CatCode::Space => Value::Space(c),
92 CatCode::Letter => Value::Letter(c),
93 CatCode::Other => Value::Other(c),
94 CatCode::Active => Value::CommandRef(CommandRef::ActiveCharacter(c)),
95 _ => panic!("raw cat code not allowed"),
96 }
97 }
98
99 pub fn char(&self) -> Option<char> {
101 match *self {
102 Value::BeginGroup(c) => Some(c),
103 Value::EndGroup(c) => Some(c),
104 Value::MathShift(c) => Some(c),
105 Value::AlignmentTab(c) => Some(c),
106 Value::Parameter(c) => Some(c),
107 Value::Superscript(c) => Some(c),
108 Value::Subscript(c) => Some(c),
109 Value::Space(c) => Some(c),
110 Value::Letter(c) => Some(c),
111 Value::Other(c) => Some(c),
112 Value::CommandRef(command_ref) => match command_ref {
113 CommandRef::ControlSequence(_) => None,
114 CommandRef::ActiveCharacter(c) => Some(c),
115 },
116 }
117 }
118
119 pub fn cat_code(&self) -> Option<CatCode> {
120 match self {
121 Value::BeginGroup(_) => Some(CatCode::BeginGroup),
122 Value::EndGroup(_) => Some(CatCode::EndGroup),
123 Value::MathShift(_) => Some(CatCode::MathShift),
124 Value::AlignmentTab(_) => Some(CatCode::AlignmentTab),
125 Value::Parameter(_) => Some(CatCode::Parameter),
126 Value::Superscript(_) => Some(CatCode::Superscript),
127 Value::Subscript(_) => Some(CatCode::Subscript),
128 Value::Space(_) => Some(CatCode::Space),
129 Value::Letter(_) => Some(CatCode::Letter),
130 Value::Other(_) => Some(CatCode::Other),
131 Value::CommandRef(command_ref) => match command_ref {
132 CommandRef::ControlSequence(_) => None,
133 CommandRef::ActiveCharacter(_) => Some(CatCode::Active),
134 },
135 }
136 }
137}
138
139#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
141#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
142pub struct Token {
143 value: Value,
144 trace_key: trace::Key,
145}
146
147impl std::fmt::Display for Token {
148 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149 match &self.value {
150 Value::CommandRef(_) => {
151 write![f, "todo"] }
153 _ => {
154 write![f, "{}", self.char().unwrap()]
155 }
156 }
157 }
158}
159
160macro_rules! token_constructor {
161 ($name: ident, $value: expr) => {
162 pub fn $name(c: char, trace_key: trace::Key) -> Token {
163 Token {
164 value: $value(c),
165 trace_key,
166 }
167 }
168 };
169}
170
171impl Token {
172 token_constructor!(new_begin_group, Value::BeginGroup);
173 token_constructor!(new_end_group, Value::EndGroup);
174 token_constructor!(new_math_shift, Value::MathShift);
175 token_constructor!(new_alignment_tab, Value::AlignmentTab);
176 token_constructor!(new_parameter, Value::Parameter);
177 token_constructor!(new_superscript, Value::Superscript);
178 token_constructor!(new_subscript, Value::Subscript);
179 token_constructor!(new_space, Value::Space);
180 token_constructor!(new_letter, Value::Letter);
181 token_constructor!(new_other, Value::Other);
182
183 pub fn new_command_ref(command_ref: CommandRef, trace_key: trace::Key) -> Token {
184 Token {
185 value: Value::CommandRef(command_ref),
186 trace_key,
187 }
188 }
189
190 pub fn new_active_character(c: char, trace_key: trace::Key) -> Token {
191 Token {
192 value: Value::CommandRef(CommandRef::ActiveCharacter(c)),
193 trace_key,
194 }
195 }
196
197 pub fn new_control_sequence(name: CsName, trace_key: trace::Key) -> Token {
198 Token {
199 value: Value::CommandRef(CommandRef::ControlSequence(name)),
200 trace_key,
201 }
202 }
203
204 pub fn new_from_value(value: Value, trace_key: trace::Key) -> Token {
205 Token { value, trace_key }
206 }
207
208 #[inline]
209 pub fn value(&self) -> Value {
210 self.value
211 }
212
213 #[inline]
214 pub fn trace_key(&self) -> trace::Key {
215 self.trace_key
216 }
217
218 pub fn char(&self) -> Option<char> {
220 self.value.char()
221 }
222
223 pub fn cat_code(&self) -> Option<CatCode> {
224 self.value.cat_code()
225 }
226}
227
228#[derive(Default)]
229enum PendingWhitespace {
230 #[default]
231 NotStarted,
232 None,
233 Space,
234 Newlines(usize),
235}
236
237impl PendingWhitespace {
238 fn reset(&mut self) {
239 *self = PendingWhitespace::None;
240 }
241
242 fn add_space(&mut self) {
243 *self = match self {
244 PendingWhitespace::NotStarted => PendingWhitespace::NotStarted,
245 PendingWhitespace::None => PendingWhitespace::Space,
246 PendingWhitespace::Space => PendingWhitespace::Space,
247 PendingWhitespace::Newlines(n) => PendingWhitespace::Newlines(*n),
248 }
249 }
250
251 fn add_newline(&mut self) {
252 *self = match self {
253 PendingWhitespace::NotStarted => PendingWhitespace::NotStarted,
254 PendingWhitespace::None => PendingWhitespace::Newlines(1),
255 PendingWhitespace::Space => PendingWhitespace::Newlines(1),
256 PendingWhitespace::Newlines(n) => PendingWhitespace::Newlines(*n + 1),
257 }
258 }
259
260 fn new_paragraph(&mut self) {
261 *self = match self {
262 PendingWhitespace::NotStarted => PendingWhitespace::NotStarted,
263 PendingWhitespace::None | PendingWhitespace::Space | PendingWhitespace::Newlines(1) => {
264 PendingWhitespace::Newlines(2)
265 }
266 PendingWhitespace::Newlines(n) => PendingWhitespace::Newlines(*n),
267 }
268 }
269}
270
271impl Display for PendingWhitespace {
272 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
273 match self {
274 PendingWhitespace::NotStarted | PendingWhitespace::None => Ok(()),
275 PendingWhitespace::Space => {
276 write!(f, " ")
277 }
278 PendingWhitespace::Newlines(n) => {
279 for _ in 0..*n {
280 writeln!(f)?;
281 }
282 Ok(())
283 }
284 }
285 }
286}
287
288#[derive(Default)]
290pub struct Writer {
291 pending_whitespace: PendingWhitespace,
292}
293
294impl Writer {
295 pub fn write(
297 &mut self,
298 io_writer: &mut dyn std::io::Write,
299 interner: &CsNameInterner,
300 value: Value,
301 ) -> Result<(), std::io::Error> {
302 match &value {
303 Value::CommandRef(CommandRef::ControlSequence(s)) => {
304 write!(
305 io_writer,
306 "{}\\{}",
307 self.pending_whitespace,
308 interner.resolve(*s).unwrap()
309 )?;
310 self.pending_whitespace.reset();
311 }
312 Value::Space(_) => self.pending_whitespace.add_space(),
313 _ => {
314 write!(
315 io_writer,
316 "{}{}",
317 self.pending_whitespace,
318 value.char().unwrap()
319 )?;
320 self.pending_whitespace.reset();
321 }
322 }
323 io_writer.flush()
324 }
325
326 pub fn add_newline(&mut self) {
327 self.pending_whitespace.add_newline();
328 }
329
330 pub fn start_paragraph(&mut self) {
331 self.pending_whitespace.new_paragraph();
332 }
333}
334
335pub fn write_tokens<'a, T>(tokens: T, interner: &CsNameInterner) -> String
337where
338 T: IntoIterator<Item = &'a Token>,
339{
340 let mut buffer: Vec<u8> = Default::default();
341 let mut writer: Writer = Default::default();
342 for token in tokens.into_iter() {
343 writer.write(&mut buffer, interner, token.value()).unwrap();
344 }
345 std::str::from_utf8(&buffer).unwrap().into()
346}
347
348pub fn write_token_values<'a, T>(values: T, interner: &CsNameInterner) -> String
350where
351 T: IntoIterator<Item = &'a Value>,
352{
353 let mut buffer: Vec<u8> = Default::default();
354 let mut writer: Writer = Default::default();
355 for value in values.into_iter() {
356 writer.write(&mut buffer, interner, *value).unwrap();
357 }
358 std::str::from_utf8(&buffer).unwrap().into()
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364
365 enum Instruction {
366 ControlSequence(&'static str),
367 Character(char, CatCode),
368 Newline,
369 NewParagraph,
370 }
371
372 fn writer_test(input: Vec<Instruction>, want: &str) {
373 let mut buffer: Vec<u8> = Default::default();
374 let mut writer: Writer = Default::default();
375 let mut interner = CsNameInterner::default();
376 for pre_interned_token in input {
377 match pre_interned_token {
378 Instruction::ControlSequence(name) => {
379 let cs_name = interner.get_or_intern(name);
380 let token = Token::new_control_sequence(cs_name, trace::Key::dummy());
381 writer.write(&mut buffer, &interner, token.value()).unwrap();
382 }
383 Instruction::Character(c, code) => {
384 let token = Token::new_from_value(Value::new(c, code), trace::Key::dummy());
385 writer.write(&mut buffer, &interner, token.value()).unwrap();
386 }
387 Instruction::Newline => {
388 writer.add_newline();
389 }
390 Instruction::NewParagraph => {
391 writer.start_paragraph();
392 }
393 };
394 }
395 let got: String = std::str::from_utf8(&buffer).unwrap().into();
396 let want = want.to_string();
397
398 if got != want {
399 println!("Output is different:");
400 println!("------[got]-------");
401 println!("{}", got);
402 println!("------[want]------");
403 println!("{}", want);
404 println!("-----------------");
405 panic!("write_tokens test failed");
406 }
407 }
408
409 macro_rules! write_tokens_tests {
410 ($( ($name: ident, $input: expr, $want: expr), )+) => {
411 $(
412 #[test]
413 fn $name() {
414 writer_test($input, $want);
415 }
416 )+
417 };
418 }
419
420 write_tokens_tests!(
421 (blank, vec!(), ""),
422 (
423 trim_whitespace_from_start,
424 vec![
425 Instruction::Character('\n', CatCode::Space),
426 Instruction::Character('\n', CatCode::Space),
427 Instruction::Character('\n', CatCode::Space),
428 Instruction::Character('H', CatCode::Letter),
429 ],
430 "H"
431 ),
432 (
433 trim_whitespace_from_end,
434 vec![
435 Instruction::Character('H', CatCode::Letter),
436 Instruction::Character('\n', CatCode::Space),
437 Instruction::Character('\n', CatCode::Space),
438 Instruction::Character('\n', CatCode::Space),
439 ],
440 "H"
441 ),
442 (
443 trim_whitespace_from_middle_1,
444 vec![
445 Instruction::Character('H', CatCode::Letter),
446 Instruction::Character(' ', CatCode::Space),
447 Instruction::Character(' ', CatCode::Space),
448 Instruction::Character('W', CatCode::Letter),
449 ],
450 "H W"
451 ),
452 (
453 trim_whitespace_from_middle_2,
454 vec![
455 Instruction::Character('H', CatCode::Letter),
456 Instruction::Character('\n', CatCode::Space),
457 Instruction::Character(' ', CatCode::Space),
458 Instruction::Character('\n', CatCode::Space),
459 Instruction::Character('W', CatCode::Letter),
460 ],
461 "H W"
462 ),
463 (
464 trim_whitespace_from_middle_3,
465 vec![
466 Instruction::Character('H', CatCode::Letter),
467 Instruction::Character('\n', CatCode::Space),
468 Instruction::Character('\n', CatCode::Space),
469 Instruction::Character('\n', CatCode::Space),
470 Instruction::Character('W', CatCode::Letter),
471 ],
472 "H W"
473 ),
474 (
475 control_sequence,
476 vec![Instruction::ControlSequence("HelloWorld"),],
477 "\\HelloWorld"
478 ),
479 (
480 newline_1,
481 vec![
482 Instruction::Character('H', CatCode::Letter),
483 Instruction::Newline,
484 Instruction::Character('W', CatCode::Letter),
485 ],
486 "H\nW"
487 ),
488 (
489 newline_2,
490 vec![
491 Instruction::Character('H', CatCode::Letter),
492 Instruction::Newline,
493 Instruction::Character(' ', CatCode::Space),
494 Instruction::Newline,
495 Instruction::Character('W', CatCode::Letter),
496 ],
497 "H\n\nW"
498 ),
499 (
500 newline_3,
501 vec![
502 Instruction::Character('H', CatCode::Letter),
503 Instruction::Newline,
504 Instruction::Character(' ', CatCode::Space),
505 Instruction::Newline,
506 Instruction::Character(' ', CatCode::Space),
507 Instruction::Newline,
508 Instruction::Character('W', CatCode::Letter),
509 ],
510 "H\n\n\nW"
511 ),
512 (
513 par_1,
514 vec![
515 Instruction::Character('H', CatCode::Letter),
516 Instruction::NewParagraph,
517 Instruction::NewParagraph,
518 Instruction::Character('W', CatCode::Letter),
519 ],
520 "H\n\nW"
521 ),
522 (
523 par_2,
524 vec![
525 Instruction::Character('H', CatCode::Letter),
526 Instruction::NewParagraph,
527 Instruction::NewParagraph,
528 Instruction::NewParagraph,
529 Instruction::Character('W', CatCode::Letter),
530 ],
531 "H\n\nW"
532 ),
533 );
534
535 #[test]
536 fn token_size() {
537 assert_eq!(std::mem::size_of::<CommandRef>(), 8);
538 assert_eq!(std::mem::size_of::<Value>(), 8);
539 assert_eq!(std::mem::size_of::<Token>(), 12);
540 assert_eq!(std::mem::size_of::<Result<Token, ()>>(), 12);
541 assert_eq!(std::mem::size_of::<Result<Option<Token>, ()>>(), 12);
542 assert_eq!(std::mem::size_of::<crate::prelude::Result<Token>>(), 12);
543 assert_eq!(
544 std::mem::size_of::<crate::prelude::Result<Option<Token>>>(),
545 12
546 );
547 }
548}