1use crate::prelude as txl;
8use crate::token::{CommandRef, Value};
9use crate::traits::*;
10use crate::*;
11
12impl Parsable for i32 {
13 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
14 let (_, i, _) = parse_integer(input)?;
15 Ok(i)
16 }
17}
18
19#[derive(Debug, PartialEq, Eq, Default)]
29pub struct Uint<const N: usize>(pub usize);
30
31impl Uint<0> {
32 pub const MAX: usize = i32::MAX as usize;
33}
34
35impl<const N: usize> Parsable for Uint<N> {
36 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
37 let (first_token, i, _) = parse_integer(input)?;
38 if i < 0 || i as usize >= N {
39 input.error(OutOfBoundsError::<N> {
40 first_token,
41 got: i,
42 })?;
43 Ok(Uint(0))
44 } else {
45 Ok(Uint(i as usize))
46 }
47 }
48}
49
50#[derive(Debug)]
51struct OutOfBoundsError<const N: usize> {
52 first_token: token::Token,
53 got: i32,
54}
55
56impl<const N: usize> error::TexError for OutOfBoundsError<N> {
57 fn kind(&self) -> error::Kind {
58 error::Kind::Token(self.first_token)
59 }
60
61 fn title(&self) -> String {
62 format!(
63 "expected an integer in the range [0, {}), got {}",
64 N, self.got
65 )
66 }
67}
68
69impl Parsable for char {
70 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
71 let u1 = Uint::<{ char::MAX as usize }>::parse(input)?;
72 let u2: u32 = u1.0.try_into().unwrap();
73 Ok(char::from_u32(u2).unwrap())
74 }
75}
76
77impl Parsable for types::CatCode {
79 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
80 let (token, i, _) = parse_integer(input)?;
81 if let Ok(val_u8) = u8::try_from(i) {
82 if let Ok(cat_code) = types::CatCode::try_from(val_u8) {
83 return Ok(cat_code);
84 }
85 }
86 input.error(parse::Error {
87 expected: "a category code number (an integer in the range [0, 15])".into(),
88 got: Some(token),
89 got_override: format!["got the integer {i}"],
90 annotation_override: "this is where the number started".into(),
91 guidance: "".into(),
92 additional_notes: vec![],
93 })?;
94 Ok(types::CatCode::try_from(0).unwrap())
95 }
96}
97
98const GUIDANCE_BEGINNING: &str =
99 "a number begins with zero or more minus signs followed by one of the following:
100- A decimal digit (0-9), which begins a decimal number.
101- The character ', which indicates the beginning of an octal number
102- The character \", which indicates the beginning of a hexadecimal number
103- The character `, followed by a character token. The character is converted into its UTF-8 number.
104- A command that references a variable, like \\year.
105";
106
107pub(crate) fn parse_integer<S: TexlangState>(
109 stream: &mut vm::ExpandedStream<S>,
110) -> txl::Result<(token::Token, i32, Option<u8>)> {
111 let sign = parse_optional_signs(stream)?;
112 let first_token = stream.next_or_err(NumberEndOfInputError {})?;
113 let (result, radix) = match first_token.value() {
114 Value::Other('0') => (parse_constant::<S, 10>(stream, 0)?, Some(10_u8)),
115 Value::Other('1') => (parse_constant::<S, 10>(stream, 1)?, Some(10_u8)),
116 Value::Other('2') => (parse_constant::<S, 10>(stream, 2)?, Some(10_u8)),
117 Value::Other('3') => (parse_constant::<S, 10>(stream, 3)?, Some(10_u8)),
118 Value::Other('4') => (parse_constant::<S, 10>(stream, 4)?, Some(10_u8)),
119 Value::Other('5') => (parse_constant::<S, 10>(stream, 5)?, Some(10_u8)),
120 Value::Other('6') => (parse_constant::<S, 10>(stream, 6)?, Some(10_u8)),
121 Value::Other('7') => (parse_constant::<S, 10>(stream, 7)?, Some(10_u8)),
122 Value::Other('8') => (parse_constant::<S, 10>(stream, 8)?, Some(10_u8)),
123 Value::Other('9') => (parse_constant::<S, 10>(stream, 9)?, Some(10_u8)),
124 Value::Other('\'') => (parse_constant::<S, 8>(stream, 0)?, Some(8_u8)),
125 Value::Other('"') => (parse_constant::<S, 16>(stream, 0)?, Some(16_u8)),
126 Value::Other('`') => (parse_character(stream)?, None),
127 Value::CommandRef(command_ref) => (
128 parse_internal_number(stream, first_token, command_ref)?.integer(),
129 None,
130 ),
131 _ => {
133 stream.back(first_token);
134 stream.error(parse::Error::new(
135 "the beginning of a number",
136 Some(first_token),
137 GUIDANCE_BEGINNING,
138 ))?;
139 (0, None)
140 }
141 };
142 let result = match sign {
143 None => result,
144 Some(_) => result.wrapping_mul(-1),
148 };
149 Ok((first_token, result, radix))
150}
151
152#[derive(Debug)]
153pub(crate) enum InternalNumber {
154 Integer(i32),
155 Dimen(core::Scaled),
156 Glue(core::Glue),
157}
158
159impl InternalNumber {
160 pub(crate) fn integer(&self) -> i32 {
161 use InternalNumber::*;
162 match self {
163 Integer(i) => *i,
164 Dimen(scaled) => scaled.0,
165 Glue(glue) => glue.width.0,
166 }
167 }
168}
169
170pub(crate) fn parse_internal_number<S: TexlangState>(
183 input: &mut vm::ExpandedStream<S>,
184 first_token: token::Token,
185 command_ref: CommandRef,
186) -> txl::Result<InternalNumber> {
187 let cmd = input.commands_map().get_command(&command_ref);
188 match cmd {
189 Some(command::Command::Variable(cmd)) => {
190 match cmd.clone().value(first_token, input)? {
191 variable::ValueRef::Int(i) => Ok(InternalNumber::Integer(*i)),
192 variable::ValueRef::CatCode(c) => Ok(InternalNumber::Integer(*c as i32)),
193 variable::ValueRef::MathCode(c) => Ok(InternalNumber::Integer(c.0 as i32)),
194 variable::ValueRef::Dimen(d) => Ok(InternalNumber::Dimen(*d)),
195 variable::ValueRef::Glue(g) => Ok(InternalNumber::Glue(*g)),
196 variable::ValueRef::Font(_) => {
197 todo!("scan a font into an int?");
199 }
200 variable::ValueRef::TokenList(_) => Err(input.fatal_error(
201 parse::Error::new(
202 "the beginning of a number",
203 Some(first_token),
204 GUIDANCE_BEGINNING,
205 )
206 .with_annotation_override("token list variable"),
207 )),
208 }
209 }
210 Some(command::Command::Character(c)) => Ok(InternalNumber::Integer(*c as i32)),
211 Some(command::Command::MathCharacter(c)) => Ok(InternalNumber::Integer(c.0 as i32)),
212 None
213 | Some(
214 command::Command::Execution(..)
215 | command::Command::Expansion(..)
216 | command::Command::Macro(..)
217 | command::Command::CharacterTokenAlias(..)
218 | command::Command::Font(..),
219 ) => {
220 let err = parse::Error::new(
221 "the beginning of a number",
222 Some(first_token),
223 GUIDANCE_BEGINNING,
224 )
225 .with_annotation_override(match cmd {
226 None => "undefined control sequence".to_string(),
227 Some(cmd) => format!["control sequence referencing {cmd}"],
228 });
229 input.expansions_mut().push(first_token);
230 Err(input.fatal_error(err))
231 }
232 }
233}
234
235#[derive(Debug)]
236struct NumberEndOfInputError;
237
238impl error::EndOfInputError for NumberEndOfInputError {
239 fn doing(&self) -> String {
240 "parsing a number".into()
241 }
242 fn notes(&self) -> Vec<error::display::Note> {
243 vec![GUIDANCE_BEGINNING.into()]
244 }
245}
246
247pub fn parse_optional_signs<S: TexlangState>(
254 stream: &mut vm::ExpandedStream<S>,
255) -> txl::Result<Option<token::Token>> {
256 let mut result = None;
257 while let Some((sign, token)) = get_optional_element_with_token![
258 stream,
259 Value::Other('+') => true,
260 Value::Other('-') => false,
261 Value::Space(_) => true,
262 ] {
263 result = match (result, sign) {
264 (None, false) => Some(token),
265 (Some(_), false) => None,
266 (result, true) => result,
267 };
268 }
269 Ok(result)
270}
271
272fn parse_character<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<i32> {
274 let c = {
276 let token = input.next_or_err(CharacterError {})?;
277 match token.value() {
278 Value::CommandRef(token::CommandRef::ControlSequence(cs_name)) => {
279 let name = input.vm().cs_name_interner().resolve(cs_name).unwrap();
280 let mut iter = name.chars();
281 match (iter.next(), iter.count()) {
282 (Some(c), 0) => c,
286 _ => {
287 input.error(parse::Error::new(
288 "a character",
289 Some(token),
290 "a character is a character token or single-character control sequence like \\a",
291 ))?;
292 '0'
293 }
294 }
295 }
296 _ => token.char().unwrap(),
297 }
298 };
299 super::OptionalSpace::parse(input)?;
300 Ok(c as i32)
301}
302
303#[derive(Debug)]
304struct CharacterError;
305
306impl error::EndOfInputError for CharacterError {
307 fn doing(&self) -> String {
308 "parsing a character".into()
309 }
310
311 fn notes(&self) -> Vec<error::display::Note> {
312 vec![
313 r"a character is a character token or single-character control sequence like \a".into(),
314 ]
315 }
316}
317
318fn parse_constant<S: TexlangState, const RADIX: i32>(
321 stream: &mut vm::ExpandedStream<S>,
322 mut result: i32,
323) -> txl::Result<i32> {
324 let mut started = RADIX == 10;
325 let mut too_big = false;
326 loop {
327 let next = match stream.next()? {
328 None => break,
329 Some(next) => next,
330 };
331 let lsd_or = match next.value() {
332 token::Value::Other(c) => {
333 let d = (c as u32).wrapping_sub('0' as u32);
334 if d < 10 && d < (RADIX as u32) {
335 Some(d as i32)
336 } else if RADIX == 16 {
337 let d = (c as u32).wrapping_sub('A' as u32);
338 if d < 6 {
339 Some(d as i32 + 10)
340 } else {
341 None
342 }
343 } else {
344 None
345 }
346 }
347 token::Value::Letter(c) => {
348 let d = (c as u32).wrapping_sub('A' as u32);
349 if RADIX == 16 && d < 6 {
350 Some(d as i32 + 10)
351 } else {
352 None
353 }
354 }
355 _ => None,
356 };
357 let lsd = match lsd_or {
358 None => {
359 stream.back(next);
360 break;
361 }
362 Some(lsd) => lsd,
363 };
364 started = true;
365 result = match add_lsd::<RADIX>(result, lsd) {
366 Some(n) => n,
367 None => {
368 if !too_big {
369 stream.error(add_lsd_error::<RADIX>(next, result, lsd))?;
370 too_big = true;
371 }
372 i32::MAX
373 }
374 }
375 }
376 if !started {
377 let (expected, guidance) = match RADIX {
378 8 => {
379 ("an octal digit",
380 "an octal digit is a token with value 0-7 and category other")
381 },
382 16 => {
383 ("a hexadecimal digit",
384 "a hexadecimal digit is either:\n- A character token with value 0-9 and category other, or\n- A character token with value A-F and category letter or other")
385 }
386 _ => unreachable!(),
387 };
388 let got = stream.peek()?;
389 stream.error(parse::Error::new(expected, got, guidance))?;
390 }
391 super::OptionalSpace::parse(stream)?;
392 Ok(result)
393}
394
395fn add_lsd<const RADIX: i32>(n: i32, lsd: i32) -> Option<i32> {
396 match n.checked_mul(RADIX) {
397 None => None,
398 Some(n) => n.checked_add(lsd),
399 }
400}
401
402fn add_lsd_error<const RADIX: i32>(token: token::Token, n: i32, lsd: i32) -> parse::Error {
403 let (got, range) = match RADIX {
404 8 => (
405 format!["got '{n:o}{lsd:o}"],
406 format!["'{:o}, '{:o}", i32::MIN, i32::MAX],
407 ),
408 10 => (
409 format!["got {n}{lsd}"],
410 format!["{}, {}", i32::MIN, i32::MAX],
411 ),
412 16 => (
413 format!["got 0x{n:X}{lsd:X}"],
414 format!["0x{:X}, 0x{:X}", i32::MIN, i32::MAX],
415 ),
416 _ => panic!("radix must be 8, 10 or 16"),
417 };
418 parse::Error {
419 expected: format!["a number in the range [{range}]"],
420 got: Some(token),
421 got_override: got,
422 annotation_override: "this digit makes the number too big".into(),
423 guidance: "".into(),
424 additional_notes: vec![],
425 }
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431 use crate::parse::testing::*;
432
433 parse_success_tests![
434 (octal_0, "'0", 0),
435 (octal_1, "'1", 1),
436 (octal_2, "'2", 2),
437 (octal_3, "'3", 3),
438 (octal_4, "'4", 4),
439 (octal_5, "'5", 5),
440 (octal_6, "'6", 6),
441 (octal_7, "'7", 7),
442 (octal_8, "'10", 8),
443 (octal_9, "'11", 9),
444 (octal_19, "'12", 10),
445 (octal_11, "'13", 11),
446 (octal_12, "'14", 12),
447 (octal_13, "'15", 13),
448 (octal_14, "'16", 14),
449 (octal_15, "'17", 15),
450 (octal_129, "'201", 129),
451 (octal_max, "'17777777777", 2147483647),
452 (octal_min, "-'17777777777", -2147483647),
453 (decimal_0, "0", 0),
454 (decimal_1, "1", 1),
455 (decimal_2, "2", 2),
456 (decimal_3, "3", 3),
457 (decimal_4, "4", 4),
458 (decimal_5, "5", 5),
459 (decimal_6, "6", 6),
460 (decimal_7, "7", 7),
461 (decimal_8, "8", 8),
462 (decimal_9, "9", 9),
463 (decimal_10, "10", 10),
464 (decimal_11, "11", 11),
465 (decimal_12, "12", 12),
466 (decimal_13, "13", 13),
467 (decimal_14, "14", 14),
468 (decimal_15, "15", 15),
469 (decimal_16, "16", 16),
470 (decimal_17, "17", 17),
471 (decimal_18, "18", 18),
472 (decimal_19, "19", 19),
473 (decimal_1_with_0_padding, "00019", 19),
474 (decimal_201, "201", 201),
475 (decimal_max, "2147483647", 2147483647),
476 (decimal_min, "-2147483647", -2147483647),
477 (hexadecimal_0, "\"0", 0),
478 (hexadecimal_1, "\"1", 1),
479 (hexadecimal_2, "\"2", 2),
480 (hexadecimal_3, "\"3", 3),
481 (hexadecimal_4, "\"4", 4),
482 (hexadecimal_5, "\"5", 5),
483 (hexadecimal_6, "\"6", 6),
484 (hexadecimal_7, "\"7", 7),
485 (hexadecimal_8, "\"8", 8),
486 (hexadecimal_9, "\"9", 9),
487 (hexadecimal_10, "\"A", 10),
488 (hexadecimal_11, "\"B", 11),
489 (hexadecimal_12, "\"C", 12),
490 (hexadecimal_13, "\"D", 13),
491 (hexadecimal_14, "\"E", 14),
492 (hexadecimal_15, "\"F", 15),
493 (hexadecimal_16, "\"10", 16),
494 (hexadecimal_17, "\"11", 17),
495 (hexadecimal_18, "\"12", 18),
496 (hexadecimal_19, "\"13", 19),
497 (hexadecimal_20, "\"14", 20),
498 (hexadecimal_21, "\"15", 21),
499 (hexadecimal_22, "\"16", 22),
500 (hexadecimal_23, "\"17", 23),
501 (hexadecimal_24, "\"18", 24),
502 (hexadecimal_25, "\"19", 25),
503 (hexadecimal_26, "\"1A", 26),
504 (hexadecimal_27, "\"1B", 27),
505 (hexadecimal_28, "\"1C", 28),
506 (hexadecimal_29, "\"1D", 29),
507 (hexadecimal_30, "\"1E", 30),
508 (hexadecimal_31, "\"1F", 31),
509 (hexadecimal_513, "\"201", 513),
510 (hexadecimal_max, "\"7FFFFFFF", 2147483647),
511 (hexadecimal_min, "-\"7FFFFFFF", -2147483647),
512 (number_from_character, "`A", 65),
513 (number_from_length_1_control_sequence, r"`\A", 65),
514 (number_from_character_non_ascii, "`ö", 0x00F6),
515 (
516 number_from_length_1_control_sequence_non_ascii,
517 r"`\ö",
518 0x00F6
519 ),
520 (signs_plus, r"+4", 4),
521 (signs_minus, r"-4", -4),
522 (signs_plus_minus, r"+-4", -4),
523 (signs_minus_minus, r"--4", 4),
524 (signs_minus_minus_spaces, r" - - 4", 4),
525 ];
526
527 #[derive(Default)]
528 struct State;
529
530 impl TexlangState for State {
531 fn cat_code(&self, c: char) -> types::CatCode {
532 if c == '9' {
533 return types::CatCode::Letter;
534 }
535 types::CatCode::PLAIN_TEX_DEFAULTS
536 .get(c as usize)
537 .copied()
538 .unwrap_or_default()
539 }
540 }
541
542 parse_failure_tests![
543 i32,
544 State,
545 (number_with_letter_catcode, "9"),
546 (octal_too_big, "'177777777770", i32::MAX),
547 (octal_empty, "'"),
548 (decimal_too_big_1, "2147483648", i32::MAX),
549 (decimal_too_big_2, "500000000000000", i32::MAX),
550 (decimal_too_negative_1, "-2147483648", -1 * i32::MAX),
551 (decimal_too_negative_2, "-5000000000000", -1 * i32::MAX),
552 (hexadecimal_too_big, "\"7FFFFFFF0", i32::MAX),
553 (hexadecimal_empty, "\""),
554 (character, "A"),
555 (control_sequence_too_big, r"`\BC", '0' as i32),
558 ];
559
560 parse_failure_tests![
561 Uint::<16>,
562 State,
563 (number_too_big, "16"),
564 (number_is_negative, "-1"),
565 ];
566}