1pub mod lexer;
4pub mod trace;
5use crate::types::CatCode;
6use std::{fmt::Display, num};
7use texcraft_stdext::collections::interner;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
14#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
15pub struct CsName(num::NonZeroU32);
16
17impl CsName {
18 #[inline]
19 pub fn to_usize(&self) -> usize {
20 self.0.get() as usize
21 }
22
23 pub fn try_from_usize(u: usize) -> Option<CsName> {
24 let u = match u32::try_from(u) {
25 Ok(u) => u,
26 Err(_) => return None,
27 };
28 num::NonZeroU32::new(u).map(CsName)
29 }
30}
31
32pub type CsNameInterner = interner::Interner<CsName>;
34
35impl interner::Key for CsName {
36 fn try_from_usize(index: usize) -> Option<Self> {
37 num::NonZeroU32::try_from_usize(index).map(CsName)
38 }
39
40 fn into_usize(self) -> usize {
41 self.0.into_usize()
42 }
43}
44
45#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
47#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
48pub enum Value {
49 BeginGroup(char),
50 EndGroup(char),
51 MathShift(char),
52 AlignmentTab(char),
53 Parameter(char),
54 Superscript(char),
55 Subscript(char),
56 Space(char),
57 Letter(char),
58 Other(char),
59 CommandRef(CommandRef),
60}
61
62#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)]
64#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
65pub enum CommandRef {
66 ControlSequence(CsName),
67 ActiveCharacter(char),
68}
69
70impl CommandRef {
71 pub fn to_string(&self, cs_name_interner: &CsNameInterner) -> String {
72 match self {
73 CommandRef::ControlSequence(cs_name) => {
74 format!("\\{}", cs_name_interner.resolve(*cs_name).unwrap())
75 }
76 CommandRef::ActiveCharacter(c) => format!("{c}"),
77 }
78 }
79}
80
81impl Value {
82 pub fn new(c: char, cat_code: CatCode) -> Value {
83 match cat_code {
84 CatCode::BeginGroup => Value::BeginGroup(c),
85 CatCode::EndGroup => Value::EndGroup(c),
86 CatCode::MathShift => Value::MathShift(c),
87 CatCode::AlignmentTab => Value::AlignmentTab(c),
88 CatCode::Parameter => Value::Parameter(c),
89 CatCode::Superscript => Value::Superscript(c),
90 CatCode::Subscript => Value::Subscript(c),
91 CatCode::Space => Value::Space(c),
92 CatCode::Letter => Value::Letter(c),
93 CatCode::Other => Value::Other(c),
94 CatCode::Active => Value::CommandRef(CommandRef::ActiveCharacter(c)),
95 _ => panic!("raw cat code not allowed"),
96 }
97 }
98
99 pub fn char(&self) -> Option<char> {
101 Some(self.char_and_cat_code()?.0)
102 }
103
104 pub fn cat_code(&self) -> Option<CatCode> {
105 Some(self.char_and_cat_code()?.1)
106 }
107
108 pub fn char_and_cat_code(&self) -> Option<(char, CatCode)> {
109 let (c, code) = match self {
110 Value::BeginGroup(c) => (c, CatCode::BeginGroup),
111 Value::EndGroup(c) => (c, CatCode::EndGroup),
112 Value::MathShift(c) => (c, CatCode::MathShift),
113 Value::AlignmentTab(c) => (c, CatCode::AlignmentTab),
114 Value::Parameter(c) => (c, CatCode::Parameter),
115 Value::Superscript(c) => (c, CatCode::Superscript),
116 Value::Subscript(c) => (c, CatCode::Subscript),
117 Value::Space(c) => (c, CatCode::Space),
118 Value::Letter(c) => (c, CatCode::Letter),
119 Value::Other(c) => (c, CatCode::Other),
120 Value::CommandRef(command_ref) => match command_ref {
121 CommandRef::ControlSequence(_) => return None,
122 CommandRef::ActiveCharacter(c) => (c, CatCode::Active),
123 },
124 };
125 Some((*c, code))
126 }
127}
128
129#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
131#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
132pub struct Token {
133 value: Value,
134 trace_key: trace::Key,
135}
136
137impl std::fmt::Display for Token {
138 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139 match &self.value {
140 Value::CommandRef(_) => {
141 write![f, "todo"] }
143 _ => {
144 write![f, "{}", self.char().unwrap()]
145 }
146 }
147 }
148}
149
150macro_rules! token_constructor {
151 ($name: ident, $value: expr) => {
152 pub fn $name(c: char, trace_key: trace::Key) -> Token {
153 Token {
154 value: $value(c),
155 trace_key,
156 }
157 }
158 };
159}
160
161impl Token {
162 token_constructor!(new_begin_group, Value::BeginGroup);
163 token_constructor!(new_end_group, Value::EndGroup);
164 token_constructor!(new_math_shift, Value::MathShift);
165 token_constructor!(new_alignment_tab, Value::AlignmentTab);
166 token_constructor!(new_parameter, Value::Parameter);
167 token_constructor!(new_superscript, Value::Superscript);
168 token_constructor!(new_subscript, Value::Subscript);
169 token_constructor!(new_space, Value::Space);
170 token_constructor!(new_letter, Value::Letter);
171 token_constructor!(new_other, Value::Other);
172
173 pub fn new_command_ref(command_ref: CommandRef, trace_key: trace::Key) -> Token {
174 Token {
175 value: Value::CommandRef(command_ref),
176 trace_key,
177 }
178 }
179
180 pub fn new_active_character(c: char, trace_key: trace::Key) -> Token {
181 Token {
182 value: Value::CommandRef(CommandRef::ActiveCharacter(c)),
183 trace_key,
184 }
185 }
186
187 pub fn new_control_sequence(name: CsName, trace_key: trace::Key) -> Token {
188 Token {
189 value: Value::CommandRef(CommandRef::ControlSequence(name)),
190 trace_key,
191 }
192 }
193
194 pub fn new_from_value(value: Value, trace_key: trace::Key) -> Token {
195 Token { value, trace_key }
196 }
197
198 #[inline]
199 pub fn value(&self) -> Value {
200 self.value
201 }
202
203 #[inline]
204 pub fn trace_key(&self) -> trace::Key {
205 self.trace_key
206 }
207
208 pub fn char(&self) -> Option<char> {
209 self.value.char()
210 }
211
212 pub fn cat_code(&self) -> Option<CatCode> {
213 self.value.cat_code()
214 }
215 pub fn char_and_cat_code(&self) -> Option<(char, CatCode)> {
216 self.value.char_and_cat_code()
217 }
218}
219
220#[derive(Default)]
221enum PendingWhitespace {
222 #[default]
223 NotStarted,
224 None,
225 Space,
226 Newlines(usize),
227}
228
229impl PendingWhitespace {
230 fn reset(&mut self) {
231 *self = PendingWhitespace::None;
232 }
233
234 fn add_space(&mut self) {
235 *self = match self {
236 PendingWhitespace::NotStarted => PendingWhitespace::NotStarted,
237 PendingWhitespace::None => PendingWhitespace::Space,
238 PendingWhitespace::Space => PendingWhitespace::Space,
239 PendingWhitespace::Newlines(n) => PendingWhitespace::Newlines(*n),
240 }
241 }
242
243 fn add_newline(&mut self) {
244 *self = match self {
245 PendingWhitespace::NotStarted => PendingWhitespace::NotStarted,
246 PendingWhitespace::None => PendingWhitespace::Newlines(1),
247 PendingWhitespace::Space => PendingWhitespace::Newlines(1),
248 PendingWhitespace::Newlines(n) => PendingWhitespace::Newlines(*n + 1),
249 }
250 }
251
252 fn new_paragraph(&mut self) {
253 *self = match self {
254 PendingWhitespace::NotStarted => PendingWhitespace::NotStarted,
255 PendingWhitespace::None | PendingWhitespace::Space | PendingWhitespace::Newlines(1) => {
256 PendingWhitespace::Newlines(2)
257 }
258 PendingWhitespace::Newlines(n) => PendingWhitespace::Newlines(*n),
259 }
260 }
261}
262
263impl Display for PendingWhitespace {
264 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
265 match self {
266 PendingWhitespace::NotStarted | PendingWhitespace::None => Ok(()),
267 PendingWhitespace::Space => {
268 write!(f, " ")
269 }
270 PendingWhitespace::Newlines(n) => {
271 for _ in 0..*n {
272 writeln!(f)?;
273 }
274 Ok(())
275 }
276 }
277 }
278}
279
280#[derive(Default)]
282pub struct Writer {
283 pending_whitespace: PendingWhitespace,
284}
285
286impl Writer {
287 pub fn write(
289 &mut self,
290 io_writer: &mut dyn std::io::Write,
291 interner: &CsNameInterner,
292 value: Value,
293 ) -> Result<(), std::io::Error> {
294 match &value {
295 Value::CommandRef(CommandRef::ControlSequence(s)) => {
296 write!(
297 io_writer,
298 "{}\\{}",
299 self.pending_whitespace,
300 interner.resolve(*s).unwrap()
301 )?;
302 self.pending_whitespace.reset();
303 }
304 Value::Space(_) => self.pending_whitespace.add_space(),
305 _ => {
306 write!(
307 io_writer,
308 "{}{}",
309 self.pending_whitespace,
310 value.char().unwrap()
311 )?;
312 self.pending_whitespace.reset();
313 }
314 }
315 io_writer.flush()
316 }
317
318 pub fn add_newline(&mut self) {
319 self.pending_whitespace.add_newline();
320 }
321
322 pub fn start_paragraph(&mut self) {
323 self.pending_whitespace.new_paragraph();
324 }
325}
326
327pub fn write_tokens<'a, T>(tokens: T, interner: &CsNameInterner) -> String
329where
330 T: IntoIterator<Item = &'a Token>,
331{
332 let mut buffer: Vec<u8> = Default::default();
333 let mut writer: Writer = Default::default();
334 for token in tokens.into_iter() {
335 writer.write(&mut buffer, interner, token.value()).unwrap();
336 }
337 std::str::from_utf8(&buffer).unwrap().into()
338}
339
340pub fn write_token_values<'a, T>(values: T, interner: &CsNameInterner) -> String
342where
343 T: IntoIterator<Item = &'a Value>,
344{
345 let mut buffer: Vec<u8> = Default::default();
346 let mut writer: Writer = Default::default();
347 for value in values.into_iter() {
348 writer.write(&mut buffer, interner, *value).unwrap();
349 }
350 std::str::from_utf8(&buffer).unwrap().into()
351}
352
353#[cfg(test)]
354mod tests {
355 use super::*;
356
357 enum Instruction {
358 ControlSequence(&'static str),
359 Character(char, CatCode),
360 Newline,
361 NewParagraph,
362 }
363
364 fn writer_test(input: Vec<Instruction>, want: &str) {
365 let mut buffer: Vec<u8> = Default::default();
366 let mut writer: Writer = Default::default();
367 let mut interner = CsNameInterner::default();
368 for pre_interned_token in input {
369 match pre_interned_token {
370 Instruction::ControlSequence(name) => {
371 let cs_name = interner.get_or_intern(name);
372 let token = Token::new_control_sequence(cs_name, trace::Key::dummy());
373 writer.write(&mut buffer, &interner, token.value()).unwrap();
374 }
375 Instruction::Character(c, code) => {
376 let token = Token::new_from_value(Value::new(c, code), trace::Key::dummy());
377 writer.write(&mut buffer, &interner, token.value()).unwrap();
378 }
379 Instruction::Newline => {
380 writer.add_newline();
381 }
382 Instruction::NewParagraph => {
383 writer.start_paragraph();
384 }
385 };
386 }
387 let got: String = std::str::from_utf8(&buffer).unwrap().into();
388 let want = want.to_string();
389
390 if got != want {
391 println!("Output is different:");
392 println!("------[got]-------");
393 println!("{}", got);
394 println!("------[want]------");
395 println!("{}", want);
396 println!("-----------------");
397 panic!("write_tokens test failed");
398 }
399 }
400
401 macro_rules! write_tokens_tests {
402 ($( ($name: ident, $input: expr, $want: expr), )+) => {
403 $(
404 #[test]
405 fn $name() {
406 writer_test($input, $want);
407 }
408 )+
409 };
410 }
411
412 write_tokens_tests!(
413 (blank, vec!(), ""),
414 (
415 trim_whitespace_from_start,
416 vec![
417 Instruction::Character('\n', CatCode::Space),
418 Instruction::Character('\n', CatCode::Space),
419 Instruction::Character('\n', CatCode::Space),
420 Instruction::Character('H', CatCode::Letter),
421 ],
422 "H"
423 ),
424 (
425 trim_whitespace_from_end,
426 vec![
427 Instruction::Character('H', CatCode::Letter),
428 Instruction::Character('\n', CatCode::Space),
429 Instruction::Character('\n', CatCode::Space),
430 Instruction::Character('\n', CatCode::Space),
431 ],
432 "H"
433 ),
434 (
435 trim_whitespace_from_middle_1,
436 vec![
437 Instruction::Character('H', CatCode::Letter),
438 Instruction::Character(' ', CatCode::Space),
439 Instruction::Character(' ', CatCode::Space),
440 Instruction::Character('W', CatCode::Letter),
441 ],
442 "H W"
443 ),
444 (
445 trim_whitespace_from_middle_2,
446 vec![
447 Instruction::Character('H', CatCode::Letter),
448 Instruction::Character('\n', CatCode::Space),
449 Instruction::Character(' ', CatCode::Space),
450 Instruction::Character('\n', CatCode::Space),
451 Instruction::Character('W', CatCode::Letter),
452 ],
453 "H W"
454 ),
455 (
456 trim_whitespace_from_middle_3,
457 vec![
458 Instruction::Character('H', CatCode::Letter),
459 Instruction::Character('\n', CatCode::Space),
460 Instruction::Character('\n', CatCode::Space),
461 Instruction::Character('\n', CatCode::Space),
462 Instruction::Character('W', CatCode::Letter),
463 ],
464 "H W"
465 ),
466 (
467 control_sequence,
468 vec![Instruction::ControlSequence("HelloWorld"),],
469 "\\HelloWorld"
470 ),
471 (
472 newline_1,
473 vec![
474 Instruction::Character('H', CatCode::Letter),
475 Instruction::Newline,
476 Instruction::Character('W', CatCode::Letter),
477 ],
478 "H\nW"
479 ),
480 (
481 newline_2,
482 vec![
483 Instruction::Character('H', CatCode::Letter),
484 Instruction::Newline,
485 Instruction::Character(' ', CatCode::Space),
486 Instruction::Newline,
487 Instruction::Character('W', CatCode::Letter),
488 ],
489 "H\n\nW"
490 ),
491 (
492 newline_3,
493 vec![
494 Instruction::Character('H', CatCode::Letter),
495 Instruction::Newline,
496 Instruction::Character(' ', CatCode::Space),
497 Instruction::Newline,
498 Instruction::Character(' ', CatCode::Space),
499 Instruction::Newline,
500 Instruction::Character('W', CatCode::Letter),
501 ],
502 "H\n\n\nW"
503 ),
504 (
505 par_1,
506 vec![
507 Instruction::Character('H', CatCode::Letter),
508 Instruction::NewParagraph,
509 Instruction::NewParagraph,
510 Instruction::Character('W', CatCode::Letter),
511 ],
512 "H\n\nW"
513 ),
514 (
515 par_2,
516 vec![
517 Instruction::Character('H', CatCode::Letter),
518 Instruction::NewParagraph,
519 Instruction::NewParagraph,
520 Instruction::NewParagraph,
521 Instruction::Character('W', CatCode::Letter),
522 ],
523 "H\n\nW"
524 ),
525 );
526
527 #[test]
528 fn token_size() {
529 assert_eq!(std::mem::size_of::<CommandRef>(), 8);
530 assert_eq!(std::mem::size_of::<Value>(), 8);
531 assert_eq!(std::mem::size_of::<Token>(), 12);
532 assert_eq!(std::mem::size_of::<Result<Token, ()>>(), 12);
533 assert_eq!(std::mem::size_of::<Result<Option<Token>, ()>>(), 12);
534 assert_eq!(std::mem::size_of::<crate::prelude::Result<Token>>(), 12);
535 assert_eq!(
536 std::mem::size_of::<crate::prelude::Result<Option<Token>>>(),
537 12
538 );
539 }
540}