1use crate::prelude as txl;
8use crate::token::{CommandRef, Value};
9use crate::traits::*;
10use crate::*;
11
12impl Parsable for i32 {
13 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
14 let (_, i, _) = parse_integer(input)?;
15 Ok(i)
16 }
17}
18
19impl Parsable for u8 {
20 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
21 let u = Uint::<256>::parse_impl(input)?;
22 Ok(u.0.try_into().expect("smaller than 256 so in range"))
23 }
24}
25#[derive(Debug, PartialEq, Eq, Default)]
35pub struct Uint<const N: usize>(pub usize);
36
37impl Uint<0> {
38 pub const MAX: usize = i32::MAX as usize;
39}
40
41impl<const N: usize> Parsable for Uint<N> {
42 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
43 let (first_token, i, _) = parse_integer(input)?;
44 if i < 0 || i as usize >= N {
45 input.error(OutOfBoundsError::<N> {
46 first_token,
47 got: i,
48 })?;
49 Ok(Uint(0))
50 } else {
51 Ok(Uint(i as usize))
52 }
53 }
54}
55
56#[derive(Debug)]
57struct OutOfBoundsError<const N: usize> {
58 first_token: token::Token,
59 got: i32,
60}
61
62impl<const N: usize> error::TexError for OutOfBoundsError<N> {
63 fn kind(&self) -> error::Kind {
64 error::Kind::Token(self.first_token)
65 }
66
67 fn title(&self) -> String {
68 format!(
69 "expected an integer in the range [0, {}), got {}",
70 N, self.got
71 )
72 }
73}
74
75impl Parsable for char {
76 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
77 let u1 = Uint::<{ char::MAX as usize }>::parse(input)?;
78 let u2: u32 = u1.0.try_into().unwrap();
79 Ok(char::from_u32(u2).unwrap())
80 }
81}
82
83impl Parsable for types::CatCode {
85 fn parse_impl<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<Self> {
86 let (token, i, _) = parse_integer(input)?;
87 if let Ok(val_u8) = u8::try_from(i) {
88 if let Ok(cat_code) = types::CatCode::try_from(val_u8) {
89 return Ok(cat_code);
90 }
91 }
92 input.error(parse::Error {
93 expected: "a category code number (an integer in the range [0, 15])".into(),
94 got: Some(token),
95 got_override: format!["got the integer {i}"],
96 annotation_override: "this is where the number started".into(),
97 guidance: "".into(),
98 additional_notes: vec![],
99 })?;
100 Ok(types::CatCode::try_from(0).unwrap())
101 }
102}
103
104const GUIDANCE_BEGINNING: &str =
105 "a number begins with zero or more minus signs followed by one of the following:
106- A decimal digit (0-9), which begins a decimal number.
107- The character ', which indicates the beginning of an octal number
108- The character \", which indicates the beginning of a hexadecimal number
109- The character `, followed by a character token. The character is converted into its UTF-8 number.
110- A command that references a variable, like \\year.
111";
112
113pub(crate) fn parse_integer<S: TexlangState>(
115 stream: &mut vm::ExpandedStream<S>,
116) -> txl::Result<(token::Token, i32, Option<u8>)> {
117 let sign = parse_optional_signs(stream)?;
118 let first_token = stream.next_or_err(NumberEndOfInputError {})?;
119 let (result, radix) = match first_token.value() {
120 Value::Other('0') => (parse_constant::<S, 10>(stream, 0)?, Some(10_u8)),
121 Value::Other('1') => (parse_constant::<S, 10>(stream, 1)?, Some(10_u8)),
122 Value::Other('2') => (parse_constant::<S, 10>(stream, 2)?, Some(10_u8)),
123 Value::Other('3') => (parse_constant::<S, 10>(stream, 3)?, Some(10_u8)),
124 Value::Other('4') => (parse_constant::<S, 10>(stream, 4)?, Some(10_u8)),
125 Value::Other('5') => (parse_constant::<S, 10>(stream, 5)?, Some(10_u8)),
126 Value::Other('6') => (parse_constant::<S, 10>(stream, 6)?, Some(10_u8)),
127 Value::Other('7') => (parse_constant::<S, 10>(stream, 7)?, Some(10_u8)),
128 Value::Other('8') => (parse_constant::<S, 10>(stream, 8)?, Some(10_u8)),
129 Value::Other('9') => (parse_constant::<S, 10>(stream, 9)?, Some(10_u8)),
130 Value::Other('\'') => (parse_constant::<S, 8>(stream, 0)?, Some(8_u8)),
131 Value::Other('"') => (parse_constant::<S, 16>(stream, 0)?, Some(16_u8)),
132 Value::Other('`') => (parse_character(stream)?, None),
133 Value::CommandRef(command_ref) => (
134 parse_internal_number(stream, first_token, command_ref)?.integer(),
135 None,
136 ),
137 _ => {
139 stream.back(first_token);
140 stream.error(parse::Error::new(
141 "the beginning of a number",
142 Some(first_token),
143 GUIDANCE_BEGINNING,
144 ))?;
145 (0, None)
146 }
147 };
148 let result = match sign {
149 None => result,
150 Some(_) => result.wrapping_mul(-1),
154 };
155 Ok((first_token, result, radix))
156}
157
158#[derive(Debug)]
159pub(crate) enum InternalNumber {
160 Integer(i32),
161 Dimen(common::Scaled),
162 Glue(common::Glue),
163}
164
165impl InternalNumber {
166 pub(crate) fn integer(&self) -> i32 {
167 use InternalNumber::*;
168 match self {
169 Integer(i) => *i,
170 Dimen(scaled) => scaled.0,
171 Glue(glue) => glue.width.0,
172 }
173 }
174}
175
176pub(crate) fn parse_internal_number<S: TexlangState>(
189 input: &mut vm::ExpandedStream<S>,
190 first_token: token::Token,
191 command_ref: CommandRef,
192) -> txl::Result<InternalNumber> {
193 let cmd = input.commands_map().get_command(&command_ref);
194 match cmd {
195 Some(command::Command::Variable(cmd)) => {
196 match cmd.clone().value(first_token, input)? {
197 variable::ValueRef::Int(i) => Ok(InternalNumber::Integer(*i)),
198 variable::ValueRef::SmallInt(c) => Ok(InternalNumber::Integer(*c as i32)),
199 variable::ValueRef::CatCode(c) => Ok(InternalNumber::Integer(*c as i32)),
200 variable::ValueRef::MathCode(c) => Ok(InternalNumber::Integer(c.0 as i32)),
201 variable::ValueRef::Dimen(d) => Ok(InternalNumber::Dimen(*d)),
202 variable::ValueRef::Glue(g) => Ok(InternalNumber::Glue(*g)),
203 variable::ValueRef::Font(_) => {
204 todo!("scan a font into an int?");
206 }
207 variable::ValueRef::TokenList(_) => Err(input.fatal_error(
208 parse::Error::new(
209 "the beginning of a number",
210 Some(first_token),
211 GUIDANCE_BEGINNING,
212 )
213 .with_annotation_override("token list variable"),
214 )),
215 }
216 }
217 Some(command::Command::Character(c)) => Ok(InternalNumber::Integer(*c as i32)),
218 Some(command::Command::MathCharacter(c)) => Ok(InternalNumber::Integer(c.0 as i32)),
219 None
220 | Some(
221 command::Command::Execution(..)
222 | command::Command::Expansion(..)
223 | command::Command::Macro(..)
224 | command::Command::CharacterTokenAlias(..)
225 | command::Command::Font(..),
226 ) => {
227 let err = parse::Error::new(
228 "the beginning of a number",
229 Some(first_token),
230 GUIDANCE_BEGINNING,
231 )
232 .with_annotation_override(match cmd {
233 None => "undefined control sequence".to_string(),
234 Some(cmd) => format!["control sequence referencing {cmd}"],
235 });
236 input.expansions_mut().push(first_token);
237 Err(input.fatal_error(err))
238 }
239 }
240}
241
242#[derive(Debug)]
243struct NumberEndOfInputError;
244
245impl error::EndOfInputError for NumberEndOfInputError {
246 fn doing(&self) -> String {
247 "parsing a number".into()
248 }
249 fn notes(&self) -> Vec<error::display::Note> {
250 vec![GUIDANCE_BEGINNING.into()]
251 }
252}
253
254pub fn parse_optional_signs<S: TexlangState>(
261 stream: &mut vm::ExpandedStream<S>,
262) -> txl::Result<Option<token::Token>> {
263 let mut result = None;
264 while let Some((sign, token)) = get_optional_element_with_token![
265 stream,
266 Value::Other('+') => true,
267 Value::Other('-') => false,
268 Value::Space(_) => true,
269 ] {
270 result = match (result, sign) {
271 (None, false) => Some(token),
272 (Some(_), false) => None,
273 (result, true) => result,
274 };
275 }
276 Ok(result)
277}
278
279fn parse_character<S: TexlangState>(input: &mut vm::ExpandedStream<S>) -> txl::Result<i32> {
281 let c = {
283 let token = input.next_or_err(CharacterError {})?;
284 match token.value() {
285 Value::CommandRef(token::CommandRef::ControlSequence(cs_name)) => {
286 let name = input.vm().cs_name_interner().resolve(cs_name).unwrap();
287 let mut iter = name.chars();
288 match (iter.next(), iter.count()) {
289 (Some(c), 0) => c,
293 _ => {
294 input.error(parse::Error::new(
295 "a character",
296 Some(token),
297 "a character is a character token or single-character control sequence like \\a",
298 ))?;
299 '0'
300 }
301 }
302 }
303 _ => token.char().unwrap(),
304 }
305 };
306 super::OptionalSpace::parse(input)?;
307 Ok(c as i32)
308}
309
310#[derive(Debug)]
311struct CharacterError;
312
313impl error::EndOfInputError for CharacterError {
314 fn doing(&self) -> String {
315 "parsing a character".into()
316 }
317
318 fn notes(&self) -> Vec<error::display::Note> {
319 vec![
320 r"a character is a character token or single-character control sequence like \a".into(),
321 ]
322 }
323}
324
325fn parse_constant<S: TexlangState, const RADIX: i32>(
328 stream: &mut vm::ExpandedStream<S>,
329 mut result: i32,
330) -> txl::Result<i32> {
331 let mut started = RADIX == 10;
332 let mut too_big = false;
333 loop {
334 let next = match stream.next()? {
335 None => break,
336 Some(next) => next,
337 };
338 let lsd_or = match next.value() {
339 token::Value::Other(c) => {
340 let d = (c as u32).wrapping_sub('0' as u32);
341 if d < 10 && d < (RADIX as u32) {
342 Some(d as i32)
343 } else if RADIX == 16 {
344 let d = (c as u32).wrapping_sub('A' as u32);
345 if d < 6 {
346 Some(d as i32 + 10)
347 } else {
348 None
349 }
350 } else {
351 None
352 }
353 }
354 token::Value::Letter(c) => {
355 let d = (c as u32).wrapping_sub('A' as u32);
356 if RADIX == 16 && d < 6 {
357 Some(d as i32 + 10)
358 } else {
359 None
360 }
361 }
362 _ => None,
363 };
364 let lsd = match lsd_or {
365 None => {
366 stream.back(next);
367 break;
368 }
369 Some(lsd) => lsd,
370 };
371 started = true;
372 result = match add_lsd::<RADIX>(result, lsd) {
373 Some(n) => n,
374 None => {
375 if !too_big {
376 stream.error(add_lsd_error::<RADIX>(next, result, lsd))?;
377 too_big = true;
378 }
379 i32::MAX
380 }
381 }
382 }
383 if !started {
384 let (expected, guidance) = match RADIX {
385 8 => {
386 ("an octal digit",
387 "an octal digit is a token with value 0-7 and category other")
388 },
389 16 => {
390 ("a hexadecimal digit",
391 "a hexadecimal digit is either:\n- A character token with value 0-9 and category other, or\n- A character token with value A-F and category letter or other")
392 }
393 _ => unreachable!(),
394 };
395 let got = stream.peek()?;
396 stream.error(parse::Error::new(expected, got, guidance))?;
397 }
398 super::OptionalSpace::parse(stream)?;
399 Ok(result)
400}
401
402fn add_lsd<const RADIX: i32>(n: i32, lsd: i32) -> Option<i32> {
403 match n.checked_mul(RADIX) {
404 None => None,
405 Some(n) => n.checked_add(lsd),
406 }
407}
408
409fn add_lsd_error<const RADIX: i32>(token: token::Token, n: i32, lsd: i32) -> parse::Error {
410 let (got, range) = match RADIX {
411 8 => (
412 format!["got '{n:o}{lsd:o}"],
413 format!["'{:o}, '{:o}", i32::MIN, i32::MAX],
414 ),
415 10 => (
416 format!["got {n}{lsd}"],
417 format!["{}, {}", i32::MIN, i32::MAX],
418 ),
419 16 => (
420 format!["got 0x{n:X}{lsd:X}"],
421 format!["0x{:X}, 0x{:X}", i32::MIN, i32::MAX],
422 ),
423 _ => panic!("radix must be 8, 10 or 16"),
424 };
425 parse::Error {
426 expected: format!["a number in the range [{range}]"],
427 got: Some(token),
428 got_override: got,
429 annotation_override: "this digit makes the number too big".into(),
430 guidance: "".into(),
431 additional_notes: vec![],
432 }
433}
434
435#[cfg(test)]
436mod tests {
437 use super::*;
438 use crate::parse::testing::*;
439
440 parse_success_tests![
441 (octal_0, "'0", 0),
442 (octal_1, "'1", 1),
443 (octal_2, "'2", 2),
444 (octal_3, "'3", 3),
445 (octal_4, "'4", 4),
446 (octal_5, "'5", 5),
447 (octal_6, "'6", 6),
448 (octal_7, "'7", 7),
449 (octal_8, "'10", 8),
450 (octal_9, "'11", 9),
451 (octal_19, "'12", 10),
452 (octal_11, "'13", 11),
453 (octal_12, "'14", 12),
454 (octal_13, "'15", 13),
455 (octal_14, "'16", 14),
456 (octal_15, "'17", 15),
457 (octal_129, "'201", 129),
458 (octal_max, "'17777777777", 2147483647),
459 (octal_min, "-'17777777777", -2147483647),
460 (decimal_0, "0", 0),
461 (decimal_1, "1", 1),
462 (decimal_2, "2", 2),
463 (decimal_3, "3", 3),
464 (decimal_4, "4", 4),
465 (decimal_5, "5", 5),
466 (decimal_6, "6", 6),
467 (decimal_7, "7", 7),
468 (decimal_8, "8", 8),
469 (decimal_9, "9", 9),
470 (decimal_10, "10", 10),
471 (decimal_11, "11", 11),
472 (decimal_12, "12", 12),
473 (decimal_13, "13", 13),
474 (decimal_14, "14", 14),
475 (decimal_15, "15", 15),
476 (decimal_16, "16", 16),
477 (decimal_17, "17", 17),
478 (decimal_18, "18", 18),
479 (decimal_19, "19", 19),
480 (decimal_1_with_0_padding, "00019", 19),
481 (decimal_201, "201", 201),
482 (decimal_max, "2147483647", 2147483647),
483 (decimal_min, "-2147483647", -2147483647),
484 (hexadecimal_0, "\"0", 0),
485 (hexadecimal_1, "\"1", 1),
486 (hexadecimal_2, "\"2", 2),
487 (hexadecimal_3, "\"3", 3),
488 (hexadecimal_4, "\"4", 4),
489 (hexadecimal_5, "\"5", 5),
490 (hexadecimal_6, "\"6", 6),
491 (hexadecimal_7, "\"7", 7),
492 (hexadecimal_8, "\"8", 8),
493 (hexadecimal_9, "\"9", 9),
494 (hexadecimal_10, "\"A", 10),
495 (hexadecimal_11, "\"B", 11),
496 (hexadecimal_12, "\"C", 12),
497 (hexadecimal_13, "\"D", 13),
498 (hexadecimal_14, "\"E", 14),
499 (hexadecimal_15, "\"F", 15),
500 (hexadecimal_16, "\"10", 16),
501 (hexadecimal_17, "\"11", 17),
502 (hexadecimal_18, "\"12", 18),
503 (hexadecimal_19, "\"13", 19),
504 (hexadecimal_20, "\"14", 20),
505 (hexadecimal_21, "\"15", 21),
506 (hexadecimal_22, "\"16", 22),
507 (hexadecimal_23, "\"17", 23),
508 (hexadecimal_24, "\"18", 24),
509 (hexadecimal_25, "\"19", 25),
510 (hexadecimal_26, "\"1A", 26),
511 (hexadecimal_27, "\"1B", 27),
512 (hexadecimal_28, "\"1C", 28),
513 (hexadecimal_29, "\"1D", 29),
514 (hexadecimal_30, "\"1E", 30),
515 (hexadecimal_31, "\"1F", 31),
516 (hexadecimal_513, "\"201", 513),
517 (hexadecimal_max, "\"7FFFFFFF", 2147483647),
518 (hexadecimal_min, "-\"7FFFFFFF", -2147483647),
519 (number_from_character, "`A", 65),
520 (number_from_length_1_control_sequence, r"`\A", 65),
521 (number_from_character_non_ascii, "`ö", 0x00F6),
522 (
523 number_from_length_1_control_sequence_non_ascii,
524 r"`\ö",
525 0x00F6
526 ),
527 (signs_plus, r"+4", 4),
528 (signs_minus, r"-4", -4),
529 (signs_plus_minus, r"+-4", -4),
530 (signs_minus_minus, r"--4", 4),
531 (signs_minus_minus_spaces, r" - - 4", 4),
532 ];
533
534 #[derive(Default)]
535 struct State;
536
537 impl TexlangState for State {
538 fn cat_code(&self, c: char) -> types::CatCode {
539 if c == '9' {
540 return types::CatCode::Letter;
541 }
542 types::CatCode::PLAIN_TEX_DEFAULTS
543 .get(c as usize)
544 .copied()
545 .unwrap_or_default()
546 }
547 }
548
549 parse_failure_tests![
550 i32,
551 State,
552 (number_with_letter_catcode, "9"),
553 (octal_too_big, "'177777777770", i32::MAX),
554 (octal_empty, "'"),
555 (decimal_too_big_1, "2147483648", i32::MAX),
556 (decimal_too_big_2, "500000000000000", i32::MAX),
557 (decimal_too_negative_1, "-2147483648", -1 * i32::MAX),
558 (decimal_too_negative_2, "-5000000000000", -1 * i32::MAX),
559 (hexadecimal_too_big, "\"7FFFFFFF0", i32::MAX),
560 (hexadecimal_empty, "\""),
561 (character, "A"),
562 (control_sequence_too_big, r"`\BC", '0' as i32),
565 ];
566
567 parse_failure_tests![
568 Uint::<16>,
569 State,
570 (number_too_big, "16"),
571 (number_is_negative, "-1"),
572 ];
573}