tfm/pl/
ast.rs

1//! Abstract syntax tree for property list files
2//!
3//! The property list [AST](Ast) is a fully typed representation of a property list file.
4
5use super::cst;
6use super::error::*;
7use crate::{ligkern::lang::PostLigOperation, Char, Face, FixWord, NamedParameter};
8use std::ops::Range;
9
10/// Abstract syntax tree for property list files
11///
12/// This is simply a list of [`Root`] nodes.
13#[derive(Debug, PartialEq, Eq)]
14#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
15pub struct Ast(pub Vec<Root>);
16
17impl Ast {
18    /// Build an AST directly from source code.
19    pub fn from_pl_source_code(source: &str) -> (Ast, Vec<ParseWarning>) {
20        let (cst, mut errors) = cst::Cst::from_pl_source_code(source);
21        let ast = Ast::from_cst(cst, &mut errors);
22        (Ast(ast), errors)
23    }
24
25    /// Build an AST from a CST.
26    pub fn from_cst(cst: cst::Cst, errors: &mut Vec<ParseWarning>) -> Vec<Root> {
27        cst.0
28            .into_iter()
29            .filter_map(|c| Root::build(c, errors))
30            .collect()
31    }
32    /// Lower an AST to a CST.
33    pub fn lower(self, char_display_format: super::CharDisplayFormat) -> cst::Cst {
34        cst::Cst(
35            self.0
36                .into_iter()
37                .map(|c| Root::lower(c, char_display_format))
38                .collect(),
39        )
40    }
41}
42
43/// Value of a leaf node in the AST that contains a single piece of data.
44///
45/// An example of this node is the `CHECKSUM` entry, which just contains a 32-bit checksum as data.
46#[derive(PartialEq, Eq, Debug)]
47pub struct SingleValue<D> {
48    /// Data in this leaf node.
49    pub data: D,
50    /// Span of the data in the property list source code.
51    pub data_span: Range<usize>,
52    // TODO: open paren spans? key spans?
53}
54
55impl<D> From<D> for SingleValue<D> {
56    fn from(data: D) -> Self {
57        Self {
58            data,
59            data_span: 0..0,
60        }
61    }
62}
63
64#[cfg(feature = "arbitrary")]
65impl<'a, D> arbitrary::Arbitrary<'a> for SingleValue<D>
66where
67    D: arbitrary::Arbitrary<'a>,
68{
69    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
70        Ok(D::arbitrary(u)?.into())
71    }
72}
73
74/// Value of a leaf node in the AST that contains two pieces of data.
75///
76/// An example of this node is the `HEADER` entry, which contains a 8-bit header index
77/// and a 32-bit value.
78#[derive(PartialEq, Eq, Debug, Clone)]
79pub struct TupleValue<D, E> {
80    /// Left piece of data in the tuple.
81    pub left: D,
82    /// Span of the left data in the property list source code.
83    pub left_span: Range<usize>,
84    /// Right piece of data in the tuple.
85    pub right: E,
86    /// Span of the right data in the property list source code.
87    pub right_span: Range<usize>,
88}
89
90impl<D, E> From<(D, E)> for TupleValue<D, E> {
91    fn from(value: (D, E)) -> Self {
92        Self {
93            left: value.0,
94            left_span: 0..0,
95            right: value.1,
96            right_span: 0..0,
97        }
98    }
99}
100
101#[cfg(feature = "arbitrary")]
102impl<'a, D, E> arbitrary::Arbitrary<'a> for TupleValue<D, E>
103where
104    D: arbitrary::Arbitrary<'a>,
105    E: arbitrary::Arbitrary<'a>,
106{
107    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
108        Ok(<(D, E)>::arbitrary(u)?.into())
109    }
110}
111
112/// Value of a branch node in the AST.
113///
114/// A branch node contains a property list and optionally a piece of data.
115/// For example, `CHARACTER` nodes specify a character in the data, and then
116///     a property list of [`Character`] nodes.
117#[derive(PartialEq, Eq, Debug)]
118pub struct Branch<D, E> {
119    /// Data in this branch node.
120    pub data: D,
121    /// Span of the data in the property list source code.
122    pub data_span: Range<usize>,
123    // TODO: open paren spans? key spans?
124    /// Elements of the property list.
125    pub children: Vec<E>,
126}
127
128impl<D, E> From<(D, Vec<E>)> for Branch<D, E> {
129    fn from(value: (D, Vec<E>)) -> Self {
130        Self {
131            data: value.0,
132            data_span: 0..0,
133            children: value.1,
134        }
135    }
136}
137
138#[cfg(feature = "arbitrary")]
139impl<'a, D, E> arbitrary::Arbitrary<'a> for Branch<D, E>
140where
141    D: arbitrary::Arbitrary<'a>,
142    E: arbitrary::Arbitrary<'a>,
143{
144    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
145        Ok(<(D, Vec<E>)>::arbitrary(u)?.into())
146    }
147}
148
149/// The first reason we have this trait is to make it possible to invoke the `from`
150/// associated function of a type without specifying the type itself.
151/// I.e., instead of writing `LeafValue::from` we can write `FromCstNode::from`.
152/// This makes the `node_impl!` macro simpler to implement because we don't need to provide
153/// the type name to the macro.
154///
155/// Second, the implementations impose trait bounds on `D` and `E` in terms of private traits.
156/// Doing this kinds of bounds on a regular impl block is not allowed by the Rust compiler
157///     (it raises the private trait in public interface error (error E0445)).
158/// But with a trait impl we get away with it.
159trait ToFromCstNode: Sized {
160    fn from_cst_node(p: cst::RegularNode, errors: &mut Vec<ParseWarning>) -> Option<Self>;
161    fn to_cst_node(self, key: &'static str, opts: &LowerOpts) -> cst::RegularNode;
162}
163
164struct LowerOpts {
165    char_display_format: super::CharDisplayFormat,
166}
167
168impl<D: TryParse> ToFromCstNode for SingleValue<D> {
169    fn from_cst_node(p: cst::RegularNode, errors: &mut Vec<ParseWarning>) -> Option<Self> {
170        let (mut input, _) = Input::new(p, errors);
171        let result = D::try_parse(&mut input).map(|data| Self {
172            data: data.0,
173            data_span: data.1,
174        });
175        input.find_junk(false);
176        result
177    }
178    fn to_cst_node(self, key: &'static str, opts: &LowerOpts) -> cst::RegularNode {
179        cst::RegularNode {
180            opening_parenthesis_span: 0..0,
181            key: key.into(),
182            key_span: 0..0,
183            data: TryParse::to_string(self.data, opts),
184            data_span: self.data_span,
185            children: None,
186            closing_parenthesis_span: 0..0,
187        }
188    }
189}
190
191impl<D: TryParse, E: Parse> ToFromCstNode for TupleValue<D, E> {
192    fn from_cst_node(p: cst::RegularNode, errors: &mut Vec<ParseWarning>) -> Option<Self> {
193        let (mut input, _) = Input::new(p, errors);
194        let (left, left_span) = D::try_parse(&mut input)?;
195        let (right, right_span) = E::parse(&mut input);
196        input.find_junk(false);
197        Some(Self {
198            left,
199            left_span,
200            right,
201            right_span,
202        })
203    }
204    fn to_cst_node(self, key: &'static str, opts: &LowerOpts) -> cst::RegularNode {
205        cst::RegularNode {
206            opening_parenthesis_span: 0..0,
207            key: key.into(),
208            key_span: 0..0,
209            data: Some(format![
210                "{} {}",
211                TryParse::to_string(self.left, opts).expect("In TupleValue<D, E>, D != ()"),
212                Parse::to_string(self.right, opts).expect("In TupleValue<D, E>, E != ()")
213            ]),
214            data_span: self.left_span.start..self.right_span.end,
215            children: None,
216            closing_parenthesis_span: 0..0,
217        }
218    }
219}
220
221impl<D: Parse, E: Node> ToFromCstNode for Branch<D, E> {
222    fn from_cst_node(p: cst::RegularNode, errors: &mut Vec<ParseWarning>) -> Option<Self> {
223        {
224            let (mut input, children) = Input::new(p, errors);
225            let (data, data_span) = D::parse(&mut input);
226            input.find_junk(true);
227            Some(Branch::<D, E> {
228                data,
229                data_span,
230                children: children
231                    .into_iter()
232                    .filter_map(|c| Node::build(c, errors))
233                    .collect(),
234            })
235        }
236    }
237    fn to_cst_node(self, key: &'static str, opts: &LowerOpts) -> cst::RegularNode {
238        cst::RegularNode {
239            opening_parenthesis_span: 0..0,
240            key: key.into(),
241            key_span: 0..0,
242            data: TryParse::to_string(self.data, opts),
243            data_span: self.data_span,
244            children: Some(
245                self.children
246                    .into_iter()
247                    .map(|c| E::lower(c, opts))
248                    .collect(),
249            ),
250            closing_parenthesis_span: 0..0,
251        }
252    }
253}
254
255trait Node: Sized {
256    fn build_regular(p: cst::RegularNode, errors: &mut Vec<ParseWarning>) -> Option<Self>;
257    fn build_comment(s: String) -> Self;
258    fn build(n: cst::Node, errors: &mut Vec<ParseWarning>) -> Option<Self> {
259        match n {
260            cst::Node::Comment(c) => Some(Node::build_comment(c)),
261            cst::Node::Regular(r) => Node::build_regular(r, errors),
262        }
263    }
264    fn lower(self, opts: &LowerOpts) -> cst::Node;
265}
266
267macro_rules! node_impl {
268    ( $type: ident, $( ($key: ident, $str: expr, $variant: ident $(, $prefix: path )? ), )+ ) => {
269
270        impl $type {
271            $(
272                pub const $key: &'static str = $str;
273            )+
274
275            pub const ALL_PROPERTY_NAMES: &'static [&'static str] = &[$( $str, )+];
276        }
277
278        impl Node for $type {
279            fn build_regular(mut r: cst::RegularNode, errors: &mut Vec<ParseWarning>) -> Option<Self> {
280                r.key.make_ascii_uppercase();
281                match r.key.as_str() {
282                    $(
283                        $type::$key => {
284                            match ToFromCstNode::from_cst_node(r, errors) {
285                                None => None,
286                                Some(v) => Some($type::$variant( $( $prefix, )? v )),
287                            }
288                        },
289                    )+
290                    _ => {
291                        errors.push(ParseWarning{
292                            span: r.key_span.clone(),
293                            knuth_pltotf_offset: Some(r.key_span.end),
294                            kind: ParseWarningKind::InvalidPropertyName {
295                                provided_name: r.key.into(),
296                                allowed_property_names: $type::ALL_PROPERTY_NAMES,
297                            }
298                        });
299                        None
300                    },
301                }
302            }
303            fn build_comment(s: String) -> Self {
304                $type::Comment(s)
305            }
306            fn lower(self, opts: &LowerOpts) -> cst::Node {
307                match self {
308                    $(
309                        $type::$variant($( $prefix, )? v) => {
310                            cst::Node::Regular(v.to_cst_node($str, opts))
311                        }
312                    )+
313                    $type::Comment(balanced_elements) => {
314                            cst::Node::Comment(balanced_elements)
315                    }
316                }
317            }
318        }
319        impl $type {
320          pub fn lower(self, char_display_format: super::CharDisplayFormat) -> cst::Node {
321            let opts = LowerOpts {
322              char_display_format,
323            };
324            Node::lower(self, &opts)
325          }
326        }
327    };
328}
329
330/// A root node in a property list file.
331///
332/// The documentation on each variant is based on the documentation in PFtoTF.2014.9.
333#[derive(PartialEq, Eq, Debug)]
334#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
335pub enum Root {
336    /// The checksum value is used to identify a particular version of a font;
337    ///     it should match the check sum value stored with the font itself.
338    /// An explicit check sum of zero is used to bypass check sum testing.
339    /// If no checksum is specified in the PL file,
340    ///     PLtoTF will compute the checksum that METAFONT would compute from the same data
341    Checksum(SingleValue<u32>),
342
343    /// The design size, which should be in the range, `[1.0, 2048)`,
344    ///     represents the default amount by which all quantities will be scaled
345    ///     if the font is not loaded with an `at` specification.
346    /// For example, if one says `\font\A=cmr10 at 15pt` in TeX language,
347    ///     the design size in the TFM file is ignored and effectively replaced by 15 points;
348    ///     but if one simply says `\font\A=cmr10` the stated design size is used.
349    /// This quantity is always in units of printer's points.
350    DesignSize(SingleValue<DesignSize>),
351
352    /// The design units specifies how many units equals the design size
353    ///     (or the eventual ‘at’ size, if the font is being scaled).
354    /// For example, suppose
355    /// you have a font that has been digitized with 600 pixels per em,
356    ///     and the design size is one em;
357    ///     then you could say `(DESIGNUNITS R 600)` if you wanted to give all of your measurements in units of pixels.
358    DesignUnits(SingleValue<FixWord>),
359
360    /// A string that identifies the correspondence between the numeric codes and font characters.
361    /// Its length must be less than 40.
362    /// (TeX ignores this information, but other software programs make use of it.)
363    CodingScheme(SingleValue<String>),
364
365    /// A string that identifies the name of the family to which this font belongs, e.g., ‘HELVETICA’.
366    /// Its length must be less than 20.
367    /// (TeX ignores this information; but it is needed, for example, when converting DVI files to PRESS files
368    /// for Xerox equipment.)
369    Family(SingleValue<String>),
370
371    /// This value is a subsidiary identification of the font within its family.
372    /// For example, bold italic condensed fonts might have the same family
373    /// name as light roman extended fonts, differing only in their face byte.
374    /// (TeX ignores this information;
375    ///     but it is needed, for example, when converting DVI files to PRESS files for Xerox equipment.)
376    Face(SingleValue<Face>),
377
378    /// If true,
379    ///     character codes less than 128 cannot lead to codes of 128 or more via ligatures or
380    ///     char lists or extensible characters.
381    /// (TeX82 ignores this flag, but older versions of TeX would only accept TFM files that were seven-bit safe.)
382    /// PLtoTF computes the correct value of this flag and gives an
383    ///     error message only if a claimed "true" value is incorrect.
384    SevenBitSafeFlag(SingleValue<bool>),
385
386    /// Value of a header.
387    /// The one-byte value should be at least 18.
388    /// The four-byte value goes into the header word whose index is the one-byte value;
389    ///     for example, to set `header[18]=1`, one may write `(HEADER D 18 O 1)`.
390    /// This notation is used for header information that is presently unnamed.
391    /// (TeX ignores it.)
392    Header(TupleValue<DecimalU8, u32>),
393
394    /// Font dimensions property list.
395    FontDimension(Branch<(), FontDimension>),
396
397    /// A lig table.
398    LigTable(Branch<(), LigTable>),
399
400    /// If the boundary character appears in a lig table command (`LIGTABLE`),
401    ///     it matches "end of word" as well as itself.
402    /// If no boundary character is given and no `LABEL BOUNDARYCHAR` occurs within a lig table,
403    ///     word boundaries will not affect ligatures or kerning.
404    BoundaryChar(SingleValue<Char>),
405
406    /// Metrics for a character in the font.
407    /// The value specifies the character and
408    ///     the property list of [`Character`] nodes specifies metrics for the character.
409    Character(Branch<Char, Character>),
410
411    /// A comment that is ignored.
412    Comment(String),
413}
414
415node_impl!(
416    Root,
417    (CHECKSUM, "CHECKSUM", Checksum),
418    (DESIGN_SIZE, "DESIGNSIZE", DesignSize),
419    (DESIGN_UNITS, "DESIGNUNITS", DesignUnits),
420    (CODING_SCHEME, "CODINGSCHEME", CodingScheme),
421    (FAMILY, "FAMILY", Family),
422    (FACE, "FACE", Face),
423    (SEVEN_BIT_SAFE_FLAG, "SEVENBITSAFEFLAG", SevenBitSafeFlag),
424    (HEADER, "HEADER", Header),
425    (FONT_DIMENSION, "FONTDIMEN", FontDimension),
426    (LIG_TABLE, "LIGTABLE", LigTable),
427    (BOUNDARY_CHAR, "BOUNDARYCHAR", BoundaryChar),
428    (CHARACTER, "CHARACTER", Character),
429);
430
431/// An element of a `FONTDIMEN` property list.
432///
433/// The property names allowed in a `FONTDIMEN` property list correspond to various TeX parameters,
434///     each of which has a (real) numeric value.
435/// All of the parameters except `SLANT` are in design units.
436///
437/// The documentation on each variant is based on the documentation in PFtoTF.2014.11.
438#[derive(PartialEq, Eq, Debug)]
439#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
440pub enum FontDimension {
441    /// A named parameters like `(SLANT R -.25)`.
442    NamedParam(NamedParameter, SingleValue<FixWord>),
443
444    /// The notation `PARAMETER n` provides another way to specify the nth parameter;
445    ///     for example, `(PARAMETER D 1 R −.25)` is another way to specify that the `SLANT` is −0.25.
446    /// The value of n must be strictly positive and less than max param words.
447    IndexedParam(TupleValue<ParameterNumber, FixWord>),
448
449    /// A comment that is ignored.
450    Comment(String),
451}
452
453/// A [`u8`] that is output in decimal when lowering the AST to a CST.
454#[derive(Debug, PartialEq, Eq)]
455#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
456pub struct DecimalU8(pub u8);
457
458impl Parse for DecimalU8 {
459    fn parse(input: &mut Input) -> (Self, Range<usize>) {
460        let (a, b) = u8::parse(input);
461        (DecimalU8(a), b)
462    }
463
464    fn to_string(self, _: &LowerOpts) -> Option<String> {
465        Some(format!["D {}", self.0])
466    }
467}
468
469#[derive(PartialEq, Eq, Debug)]
470#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
471pub struct ParameterNumber(pub u16);
472
473impl TryParse for ParameterNumber {
474    fn try_parse(input: &mut Input) -> Option<(Self, Range<usize>)> {
475        input.consume_spaces();
476        let span_start = input.raw_data_span.start;
477        let (a, err_or) = match parse_u8(input) {
478            Ok(u) => (u, None),
479            Err(err) => {
480                input.skip_error(err.clone().into());
481                (0, Some(err))
482            }
483        };
484        let b = span_start..input.raw_data_span.start;
485        match a {
486            0 => {
487                input.skip_error(ParseWarning {
488                    span: b.clone(),
489                    knuth_pltotf_offset: Some(match err_or {
490                        // After this error occurs pltotf calls `backup` which shifts
491                        // the scanner back 1 character.
492                        Some(ParseU8Error::SmallIntegerIsTooBig { .. }) => b.end - 1,
493                        _ => b.end,
494                    }),
495                    kind: ParseWarningKind::ParameterNumberIsZero,
496                });
497                None
498            }
499            u8::MAX => {
500                input.skip_error(ParseWarning {
501                    span: b.clone(),
502                    knuth_pltotf_offset: Some(b.end),
503                    kind: ParseWarningKind::ParameterNumberIsTooBig,
504                });
505                None
506            }
507            n => Some((ParameterNumber(n as u16), b)),
508        }
509    }
510
511    fn to_string(self, _: &LowerOpts) -> Option<String> {
512        Some(format!["D {}", self.0])
513    }
514}
515
516node_impl!(
517    FontDimension,
518    (SLANT, "SLANT", NamedParam, NamedParameter::Slant),
519    (SPACE, "SPACE", NamedParam, NamedParameter::Space),
520    (STRETCH, "STRETCH", NamedParam, NamedParameter::Stretch),
521    (SHRINK, "SHRINK", NamedParam, NamedParameter::Shrink),
522    (X_HEIGHT, "XHEIGHT", NamedParam, NamedParameter::XHeight),
523    (QUAD, "QUAD", NamedParam, NamedParameter::Quad),
524    (
525        EXTRA_SPACE,
526        "EXTRASPACE",
527        NamedParam,
528        NamedParameter::ExtraSpace
529    ),
530    (NUM_1, "NUM1", NamedParam, NamedParameter::Num1),
531    (NUM_2, "NUM2", NamedParam, NamedParameter::Num2),
532    (NUM_3, "NUM3", NamedParam, NamedParameter::Num3),
533    (DENOM_1, "DENOM1", NamedParam, NamedParameter::Denom1),
534    (DENOM_2, "DENOM2", NamedParam, NamedParameter::Denom2),
535    (SUP_1, "SUP1", NamedParam, NamedParameter::Sup1),
536    (SUP_2, "SUP2", NamedParam, NamedParameter::Sup2),
537    (SUP_3, "SUP3", NamedParam, NamedParameter::Sup3),
538    (SUB_1, "SUB1", NamedParam, NamedParameter::Sub1),
539    (SUB_2, "SUB2", NamedParam, NamedParameter::Sub2),
540    (SUP_DROP, "SUPDROP", NamedParam, NamedParameter::SupDrop),
541    (SUB_DROP, "SUBDROP", NamedParam, NamedParameter::SubDrop),
542    (DELIM_1, "DELIM1", NamedParam, NamedParameter::Delim1),
543    (DELIM_2, "DELIM2", NamedParam, NamedParameter::Delim2),
544    (
545        AXIS_HEIGHT,
546        "AXISHEIGHT",
547        NamedParam,
548        NamedParameter::AxisHeight
549    ),
550    (
551        DEFAULT_RULE_THICKNESS,
552        "DEFAULTRULETHICKNESS",
553        NamedParam,
554        NamedParameter::DefaultRuleThickness
555    ),
556    (
557        BIG_OP_SPACING_1,
558        "BIGOPSPACING1",
559        NamedParam,
560        NamedParameter::BigOpSpacing1
561    ),
562    (
563        BIG_OP_SPACING_2,
564        "BIGOPSPACING2",
565        NamedParam,
566        NamedParameter::BigOpSpacing2
567    ),
568    (
569        BIG_OP_SPACING_3,
570        "BIGOPSPACING3",
571        NamedParam,
572        NamedParameter::BigOpSpacing3
573    ),
574    (
575        BIG_OP_SPACING_4,
576        "BIGOPSPACING4",
577        NamedParam,
578        NamedParameter::BigOpSpacing4
579    ),
580    (
581        BIG_OP_SPACING_5,
582        "BIGOPSPACING5",
583        NamedParam,
584        NamedParameter::BigOpSpacing5
585    ),
586    (PARAMETER, "PARAMETER", IndexedParam),
587);
588
589/// An element of a `CHARACTER` property list.
590///
591/// The documentation on each variant is based on the documentation in PFtoTF.2014.12.
592#[derive(PartialEq, Eq, Debug)]
593#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
594pub enum Character {
595    /// The character's width in design units.
596    Width(SingleValue<Option<FixWord>>),
597
598    /// The character's height in design units.
599    Height(SingleValue<FixWord>),
600
601    /// The character's depth in design units.
602    Depth(SingleValue<FixWord>),
603
604    /// The character's italic correction in design units.
605    ItalicCorrection(SingleValue<FixWord>),
606
607    /// Specifies the character that follows the present one in a "charlist."
608    /// The value must be the number of a character in the font,
609    ///     and there must be no infinite cycles of supposedly larger and larger characters.
610    NextLarger(SingleValue<Char>),
611
612    /// Specifies an extensible character.
613    /// This option and `NEXTLARGER` are mutually exclusive;
614    ///     i.e., they cannot both be used within the same `CHARACTER` list.
615    ExtensibleCharacter(Branch<(), ExtensibleCharacter>),
616
617    /// A comment that is ignored.
618    Comment(String),
619}
620
621node_impl!(
622    Character,
623    (WIDTH, "CHARWD", Width),
624    (HEIGHT, "CHARHT", Height),
625    (DEPTH, "CHARDP", Depth),
626    (ITALIC_CORRECTION, "CHARIC", ItalicCorrection),
627    (NEXT_LARGER, "NEXTLARGER", NextLarger),
628    (EXTENSIBLE_CHARACTER, "VARCHAR", ExtensibleCharacter),
629);
630
631/// An element of a `VARCHAR` property list.
632///
633/// The documentation on each variant is based on the documentation in PFtoTF.2014.12.
634#[derive(PartialEq, Eq, Debug)]
635#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
636pub enum ExtensibleCharacter {
637    /// The top piece of an extensible character, or 0 if the top piece is absent.
638    Top(SingleValue<Char>),
639
640    /// The middle piece of an extensible character, or 0 if the top piece is absent.
641    Middle(SingleValue<Char>),
642
643    /// The bottom piece of an extensible character, or 0 if the top piece is absent.
644    Bottom(SingleValue<Char>),
645
646    /// The replicated piece of an extensible character, or 0 if it is absent.
647    Replicated(SingleValue<Char>),
648
649    /// A comment that is ignored.
650    Comment(String),
651}
652
653node_impl!(
654    ExtensibleCharacter,
655    (TOP, "TOP", Top),
656    (MIDDLE, "MID", Middle),
657    (BOTTOM, "BOT", Bottom),
658    (REPLICATED, "REP", Replicated),
659);
660
661/// An element of a `LIGTABLE` property list.
662///
663/// A lig table property list contains elements of four kinds,
664///     specifying a program in a simple command language that TeX uses for ligatures and kerns.
665/// If several lig table lists appear, they are effectively concatenated into a single list.
666///
667/// The documentation here and on each variant is based on the documentation in PFtoTF.2014.13.
668#[derive(PartialEq, Eq, Debug)]
669#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
670pub enum LigTable {
671    /// A label specifies that the program for the stated character value starts here.
672    /// The integer must be the number of a character in the font;
673    ///     its `CHARACTER` property list must not have a `NEXTLARGER` or `VARCHAR` field.
674    /// At least one `LIG` or `KRN` step must follow.
675    ///
676    /// `LABEL BOUNDARYCHAR` means that the program for beginning-of-word ligatures starts here.
677    Label(SingleValue<LigTableLabel>),
678
679    /// The instruction `(LIG c r)` means,
680    ///     "If the next character is c,
681    ///     then insert character r and possibly delete the current character and/or c;
682    ///     otherwise go on to the next instruction."
683    /// Characters r and c must be present in the font.
684    ///
685    /// The `LIG` keyword may be immediately preceded or followed by a slash,
686    ///     and then immediately followed by > characters not exceeding the number of slashes.
687    /// Thus there are eight possible forms:
688    ///
689    /// | keyword   | retain  | move to  | [`PostLigOperation`] value |
690    /// |-----------|---------|----------|--------|
691    /// | `LIG`     | neither | inserted | [`PostLigOperation::RetainNeitherMoveToInserted`] |
692    /// | `/LIG`    | left    | left     | [`PostLigOperation::RetainLeftMoveNowhere`]       |
693    /// | `/LIG>`   | left    | inserted | [`PostLigOperation::RetainLeftMoveToInserted`]    |
694    /// | `LIG/`    | right   | inserted | [`PostLigOperation::RetainRightMoveToInserted`]   |
695    /// | `LIG/>`   | right   | right    | [`PostLigOperation::RetainRightMoveToRight`]      |
696    /// | `/LIG/`   | both    | left     | [`PostLigOperation::RetainBothMoveToInserted`]    |
697    /// | `/LIG/>`  | both    | inserted | [`PostLigOperation::RetainBothMoveToInserted`]    |
698    /// | `/LIG/>>` | both    | right    | [`PostLigOperation::RetainBothMoveToRight`]       |
699    ///
700    /// The slashes specify retention of the left or right original character; the > signs specify passing over
701    /// the result without further ligature processing.
702    Lig(PostLigOperation, TupleValue<Char, Char>),
703
704    /// A kern instruction `(KRN c r)` means,
705    ///     "If the next character is c, then insert a blank space of width r between the current character and c;
706    ///     otherwise go on to the next instruction."
707    /// The value of r, which is in design units, is often negative.
708    /// Character code c must exist in the font.
709    Kern(TupleValue<Char, FixWord>),
710
711    /// A stop instruction ends a ligature/kern program.
712    /// It must follow either a `LIG` or `KRN` instruction, not a `LABEL` or `STOP` or `SKIP`.
713    Stop(SingleValue<()>),
714
715    /// A skip instruction specifies continuation of a ligature/kern program after
716    /// the specified number of LIG or KRN steps has been skipped over.
717    /// The number of subsequent LIG and KRN instructions must therefore exceed this specified amount.
718    Skip(SingleValue<DecimalU8>),
719
720    /// A comment that is ignored.
721    Comment(String),
722}
723
724/// Value of a label in a lig table.
725#[derive(PartialEq, Eq, Debug)]
726#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
727pub enum LigTableLabel {
728    /// A specific character.
729    Char(Char),
730    /// The boundary character.
731    BoundaryChar,
732}
733
734impl Parse for LigTableLabel {
735    fn parse(input: &mut Input) -> (Self, Range<usize>) {
736        match input.peek() {
737            Some('B' | 'b') => {
738                let span_start = input.raw_data_offset;
739                input.skip_to_end();
740                (LigTableLabel::BoundaryChar, span_start..span_start + 1)
741            }
742            _ => {
743                let (c, span) = Parse::parse(input);
744                (LigTableLabel::Char(c), span)
745            }
746        }
747    }
748    fn to_string(self, opts: &LowerOpts) -> Option<String> {
749        match self {
750            LigTableLabel::Char(c) => Parse::to_string(c, opts),
751            LigTableLabel::BoundaryChar => Some("BOUNDARYCHAR".into()),
752        }
753    }
754}
755
756node_impl!(
757    LigTable,
758    (LABEL, "LABEL", Label),
759    (
760        LIG_1,
761        "LIG",
762        Lig,
763        PostLigOperation::RetainNeitherMoveToInserted
764    ),
765    (LIG_2, "/LIG", Lig, PostLigOperation::RetainLeftMoveNowhere),
766    (
767        LIG_3,
768        "/LIG>",
769        Lig,
770        PostLigOperation::RetainLeftMoveToInserted
771    ),
772    (
773        LIG_4,
774        "LIG/",
775        Lig,
776        PostLigOperation::RetainRightMoveToInserted
777    ),
778    (
779        LIG_5,
780        "LIG/>",
781        Lig,
782        PostLigOperation::RetainRightMoveToRight
783    ),
784    (LIG_6, "/LIG/", Lig, PostLigOperation::RetainBothMoveNowhere),
785    (
786        LIG_7,
787        "/LIG/>",
788        Lig,
789        PostLigOperation::RetainBothMoveToInserted
790    ),
791    (
792        LIG_8,
793        "/LIG/>>",
794        Lig,
795        PostLigOperation::RetainBothMoveToRight
796    ),
797    (KERN, "KRN", Kern),
798    (STOP, "STOP", Stop),
799    (SKIP, "SKIP", Skip),
800);
801
802trait Parse: Sized {
803    fn parse(input: &mut Input) -> (Self, Range<usize>);
804    fn to_string(self, opts: &LowerOpts) -> Option<String>;
805}
806
807trait TryParse: Sized {
808    fn try_parse(input: &mut Input) -> Option<(Self, Range<usize>)>;
809    fn to_string(self, opts: &LowerOpts) -> Option<String>;
810}
811
812impl<T: Parse> TryParse for T {
813    fn try_parse(input: &mut Input) -> Option<(Self, Range<usize>)> {
814        Some(Parse::parse(input))
815    }
816    fn to_string(self, opts: &LowerOpts) -> Option<String> {
817        Parse::to_string(self, opts)
818    }
819}
820
821#[derive(Debug)]
822struct Input<'a> {
823    raw_data: String,
824    raw_data_offset: usize,
825    raw_data_span: Range<usize>,
826    errors: &'a mut Vec<ParseWarning>,
827}
828
829impl<'a> Input<'a> {
830    fn new(p: cst::RegularNode, errors: &'a mut Vec<ParseWarning>) -> (Self, Vec<cst::Node>) {
831        (
832            Input {
833                raw_data: p.data.unwrap_or_default(),
834                raw_data_offset: 0,
835                errors,
836                raw_data_span: p.data_span,
837            },
838            p.children.unwrap_or_default(),
839        )
840    }
841    fn skip_error(&mut self, error: ParseWarning) {
842        self.errors.push(error);
843        self.skip_to_end();
844    }
845    fn skip_to_end(&mut self) {
846        while self.next().is_some() {}
847    }
848    fn peek(&self) -> Option<char> {
849        self.raw_data[self.raw_data_offset..].chars().next()
850    }
851    fn consume_spaces(&mut self) {
852        while self.raw_data[self.raw_data_offset..].starts_with([' ', '\n']) {
853            self.raw_data_offset += 1;
854            self.raw_data_span.start += 1;
855        }
856    }
857    fn last_span(&self, last: Option<char>) -> Range<usize> {
858        let end_span = self.raw_data_span.start;
859        match last {
860            None => end_span..end_span,
861            Some(_) => end_span - 1..end_span,
862        }
863    }
864    fn take_string(&mut self) -> String {
865        assert_eq!(self.raw_data_offset, 0);
866        let mut res = String::new();
867        std::mem::swap(&mut res, &mut self.raw_data);
868        self.raw_data_span.start = self.raw_data_span.end;
869        res
870    }
871    fn find_junk(&mut self, is_branch: bool) {
872        self.consume_spaces();
873        if !self.raw_data_span.is_empty() {
874            let span = self.raw_data_span.clone();
875            let knuth_pltotf_offset = Some(self.raw_data_span.start + 1);
876            let junk: String = self.collect();
877            self.errors.push(ParseWarning {
878                span,
879                knuth_pltotf_offset,
880                kind: if is_branch {
881                    ParseWarningKind::JunkInsidePropertyList { junk }
882                } else {
883                    ParseWarningKind::JunkAfterPropertyValue { junk }
884                },
885            })
886        }
887    }
888}
889
890impl<'a> Iterator for Input<'a> {
891    type Item = char;
892    fn next(&mut self) -> Option<Self::Item> {
893        let res = self.raw_data[self.raw_data_offset..].chars().next();
894        if let Some(c) = res {
895            if !c.is_ascii_graphic() && c != ' ' {
896                self.errors.push(ParseWarning {
897                    span: self.raw_data_span.start..self.raw_data_span.start + 1,
898                    knuth_pltotf_offset: Some(self.raw_data_span.start + 1),
899                    kind: ParseWarningKind::NonVisibleAsciiCharacter { character: c },
900                });
901            }
902            self.raw_data_offset += c.len_utf8();
903            self.raw_data_span.start += 1;
904        }
905        res
906    }
907}
908
909impl Parse for () {
910    fn parse(input: &mut Input) -> (Self, Range<usize>) {
911        ((), input.raw_data_span.start..input.raw_data_span.start)
912    }
913    fn to_string(self, _: &LowerOpts) -> Option<String> {
914        None
915    }
916}
917
918impl Parse for u32 {
919    // PLtoTF.2014.59-60
920    fn parse(input: &mut Input) -> (Self, Range<usize>) {
921        input.consume_spaces();
922        let start_span = input.raw_data_span.start;
923        let radix = match input.next() {
924            Some('O' | 'o') => 8_u8,
925            Some('H' | 'h') => 16,
926            c => {
927                let span = input.last_span(c);
928                input.skip_error(ParseWarning {
929                    span: span.clone(),
930                    knuth_pltotf_offset: Some(span.end),
931                    kind: ParseWarningKind::InvalidPrefixForInteger { prefix: c },
932                });
933                return (0, span);
934            }
935        };
936        input.consume_spaces();
937        let number_start_span = input.raw_data_span.start;
938        let mut acc: u32 = 0;
939        while let Some(c) = input.peek() {
940            let n: u32 = match c.to_digit(16) {
941                None => break,
942                Some(d) => d,
943            };
944            input.next();
945            if n >= radix as u32 {
946                let span_start = input.raw_data_span.start - 1;
947                input.skip_error(ParseWarning {
948                    span: span_start..span_start + 1,
949                    knuth_pltotf_offset: Some(span_start + 1),
950                    kind: ParseWarningKind::InvalidOctalDigit { invalid_digit: c },
951                });
952                break;
953            }
954            match acc
955                .checked_mul(radix as u32)
956                .and_then(|acc| acc.checked_add(n))
957            {
958                None => {
959                    // Overflow has occurred.
960                    // We advance to the end of the integer constant so that the span in the error is
961                    // most accurate.
962                    let knuth_pltotf_offset = input.raw_data_span.start;
963                    while let Some(c) = input.peek() {
964                        match c.to_digit(radix as u32) {
965                            None => break,
966                            Some(_) => input.next(),
967                        };
968                    }
969                    let end_span = input.raw_data_span.start;
970                    input.skip_error(ParseWarning {
971                        span: number_start_span..end_span,
972                        knuth_pltotf_offset: Some(knuth_pltotf_offset),
973                        kind: ParseWarningKind::IntegerIsTooBig { radix },
974                    });
975                    break;
976                }
977                Some(new_acc) => acc = new_acc,
978            }
979        }
980        (acc, start_span..input.raw_data_span.start)
981    }
982    fn to_string(self, _: &LowerOpts) -> Option<String> {
983        Some(format!("O {self:o}"))
984    }
985}
986
987impl Parse for u8 {
988    fn parse(input: &mut Input) -> (Self, Range<usize>) {
989        input.consume_spaces();
990        let span_start = input.raw_data_span.start;
991        let u = match parse_u8(input) {
992            Ok(u) => u,
993            Err(err) => {
994                input.skip_error(err.into());
995                0
996            }
997        };
998        let span_end = input.raw_data_span.start;
999        (u, span_start..span_end)
1000    }
1001    fn to_string(self, _: &LowerOpts) -> Option<String> {
1002        Some(format!["O {self:o}"])
1003    }
1004}
1005
1006impl Parse for Char {
1007    fn parse(input: &mut Input) -> (Self, Range<usize>) {
1008        let (u, span) = u8::parse(input);
1009        (Char(u), span)
1010    }
1011    fn to_string(self, opts: &LowerOpts) -> Option<String> {
1012        // TFtoPL.2014.38 and my interpretation of `man tftopl`
1013        // Note that the Pascal code is changed as part of compiling web2c, and these
1014        // changes influence the behavior here.
1015        use super::CharDisplayFormat;
1016        let output_as_ascii = match (opts.char_display_format, self.0 as char) {
1017            (CharDisplayFormat::Default, 'a'..='z' | 'A'..='Z' | '0'..='9') => true,
1018            (CharDisplayFormat::Ascii, '(' | ')') => false,
1019            (CharDisplayFormat::Ascii, '!'..='~') => true,
1020            _ => false,
1021        };
1022        if output_as_ascii {
1023            Some(format!("C {}", self.0 as char))
1024        } else {
1025            Parse::to_string(self.0, opts)
1026        }
1027    }
1028}
1029
1030impl Parse for String {
1031    fn parse(input: &mut Input) -> (Self, Range<usize>) {
1032        let span_start = input.raw_data_span.start;
1033        let s = input.take_string();
1034        let l = s.len();
1035        (s, span_start..span_start + l)
1036    }
1037    fn to_string(self, _: &LowerOpts) -> Option<String> {
1038        Some(self)
1039    }
1040}
1041
1042impl Parse for Face {
1043    fn parse(input: &mut Input) -> (Self, Range<usize>) {
1044        let (u, span) = u8::parse(input);
1045        (u.into(), span)
1046    }
1047    fn to_string(self, opts: &LowerOpts) -> Option<String> {
1048        // TFtoPL.2014.39
1049        match self {
1050            Face::Valid(w, s, e) => Some(format!(
1051                "F {}{}{}",
1052                match w {
1053                    crate::FaceWeight::Light => 'L',
1054                    crate::FaceWeight::Medium => 'M',
1055                    crate::FaceWeight::Bold => 'B',
1056                },
1057                match s {
1058                    crate::FaceSlope::Roman => 'R',
1059                    crate::FaceSlope::Italic => 'I',
1060                },
1061                match e {
1062                    crate::FaceExpansion::Regular => 'R',
1063                    crate::FaceExpansion::Condensed => 'C',
1064                    crate::FaceExpansion::Extended => 'E',
1065                },
1066            )),
1067            Face::Other(u) => Parse::to_string(u, opts),
1068        }
1069    }
1070}
1071
1072impl TryParse for bool {
1073    fn try_parse(input: &mut Input) -> Option<(Self, Range<usize>)> {
1074        // PLtoTF.2014.90
1075        let span_start = input.raw_data_span.start;
1076        let b = match input.next() {
1077            Some('T' | 't') => true,
1078            Some('F' | 'f') => false,
1079            _ => {
1080                input.skip_to_end();
1081                let span_end = input.raw_data_span.start;
1082                input.skip_error(ParseWarning {
1083                    span: span_start..span_end,
1084                    knuth_pltotf_offset: Some(span_start + 1),
1085                    kind: ParseWarningKind::InvalidBoolean,
1086                });
1087                return None;
1088            }
1089        };
1090        let span_end = input.raw_data_span.start;
1091        input.skip_to_end();
1092        Some((b, span_start..span_end))
1093    }
1094    fn to_string(self, _: &LowerOpts) -> Option<String> {
1095        Some(if self { "TRUE" } else { "FALSE" }.into())
1096    }
1097}
1098
1099impl Parse for FixWord {
1100    // PLtoTF.2014.62
1101    fn parse(input: &mut Input) -> (Self, Range<usize>) {
1102        input.consume_spaces();
1103        let span_start = input.raw_data_span.start;
1104        match input.next() {
1105            Some('D' | 'd') | Some('R' | 'r') => (),
1106            c => {
1107                let span = input.last_span(c);
1108                input.skip_error(ParseWarning {
1109                    span: span.clone(),
1110                    knuth_pltotf_offset: Some(span.end),
1111                    kind: ParseWarningKind::InvalidPrefixForDecimalNumber,
1112                });
1113                return (FixWord::ZERO, span);
1114            }
1115        }
1116        input.consume_spaces();
1117        let number_span_start = input.raw_data_span.start;
1118
1119        // PLtoTF.2014.63
1120        let negative = {
1121            let mut negative = false;
1122            loop {
1123                match input.peek() {
1124                    Some('+' | ' ') => (),
1125                    Some('-') => {
1126                        negative = !negative;
1127                    }
1128                    _ => break,
1129                };
1130                input.next();
1131            }
1132            negative
1133        };
1134
1135        let integer_part = {
1136            let mut acc = 0_i32;
1137            while let Some(d) = input.peek().and_then(|c| c.to_digit(10)) {
1138                input.next();
1139                // PLtoTF.2014.64
1140                // The arithmetic here is guaranteed to succeed because we impose acc <= 2048
1141                acc = acc.checked_mul(10).unwrap().checked_add(d as i32).unwrap();
1142                if acc >= 2048 {
1143                    // We set the accumulator to 2048 and keep going. This allows us to capture
1144                    // the full span for the number in the error.
1145                    acc = 2048;
1146                }
1147            }
1148            acc
1149        };
1150
1151        let fractional_part = {
1152            let mut acc = 0_i32;
1153            if input.peek() == Some('.') {
1154                input.next();
1155                // PLtoTF.2014.66
1156                let mut fractional_digits = [0_i32; 7];
1157                for slot in &mut fractional_digits {
1158                    match input.peek().and_then(|c| c.to_digit(10)) {
1159                        Some(d) => {
1160                            input.next();
1161                            *slot = 0o10000000_i32.checked_mul(d as i32).unwrap();
1162                        }
1163                        None => break,
1164                    }
1165                }
1166                for j in (0..7).rev() {
1167                    acc = fractional_digits[j].checked_add(acc / 10).unwrap();
1168                }
1169                acc = (acc + 10) / 20;
1170            }
1171            acc
1172        };
1173
1174        if integer_part >= 2048 || (fractional_part >= FixWord::ONE.0 && integer_part == 2047) {
1175            let span_end = input.raw_data_span.start;
1176            input.skip_error(ParseWarning {
1177                span: number_span_start..span_end,
1178                knuth_pltotf_offset: Some(number_span_start),
1179                kind: ParseWarningKind::DecimalNumberIsTooBig,
1180            });
1181            return if integer_part == 2047 {
1182                (FixWord::ONE, span_start..span_end)
1183            } else {
1184                (FixWord::ZERO, span_start..span_end)
1185            };
1186        }
1187
1188        let modulus = integer_part
1189            .checked_mul(FixWord::ONE.0)
1190            .unwrap()
1191            .checked_add(fractional_part)
1192            .unwrap();
1193        let result = if negative {
1194            modulus.checked_mul(-1).unwrap()
1195        } else {
1196            modulus
1197        };
1198        (FixWord(result), span_start..input.raw_data_span.start)
1199    }
1200    fn to_string(self, _: &LowerOpts) -> Option<String> {
1201        Some(format!["R {self}"])
1202    }
1203}
1204
1205/// Design size of the font.
1206#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1207#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1208pub enum DesignSize {
1209    Valid(FixWord),
1210    Invalid,
1211}
1212
1213impl Default for DesignSize {
1214    fn default() -> Self {
1215        DesignSize::Valid(FixWord::ONE * 10)
1216    }
1217}
1218
1219impl DesignSize {
1220    pub fn get(&self) -> FixWord {
1221        match self {
1222            DesignSize::Valid(v) => *v,
1223            DesignSize::Invalid => FixWord::ONE * 10,
1224        }
1225    }
1226}
1227
1228impl From<FixWord> for DesignSize {
1229    fn from(value: FixWord) -> Self {
1230        Self::Valid(value)
1231    }
1232}
1233
1234impl Parse for DesignSize {
1235    fn parse(input: &mut Input) -> (Self, Range<usize>) {
1236        let (n, r) = FixWord::parse(input);
1237        let d = if n < FixWord::ONE {
1238            input.errors.push(ParseWarning {
1239                span: r.start..input.raw_data_span.end,
1240                knuth_pltotf_offset: Some(input.raw_data_span.end),
1241                kind: ParseWarningKind::DesignSizeIsTooSmall,
1242            });
1243            DesignSize::Invalid
1244        } else {
1245            DesignSize::Valid(n)
1246        };
1247        (d, r)
1248    }
1249    fn to_string(self, opts: &LowerOpts) -> Option<String> {
1250        match self {
1251            DesignSize::Valid(n) => Parse::to_string(n, opts),
1252            DesignSize::Invalid => Some("D 10".to_string()),
1253        }
1254    }
1255}
1256
1257impl Parse for Option<FixWord> {
1258    fn parse(input: &mut Input) -> (Self, Range<usize>) {
1259        let (n, r) = FixWord::parse(input);
1260        (Some(n), r)
1261    }
1262
1263    fn to_string(self, opts: &LowerOpts) -> Option<String> {
1264        match self {
1265            None => None,
1266            Some(n) => Parse::to_string(n, opts),
1267        }
1268    }
1269}
1270
1271#[derive(Clone)]
1272enum ParseU8Error {
1273    SmallIntegerIsTooBig {
1274        span: std::ops::Range<usize>,
1275        knuth_pltotf_offset: usize,
1276        radix: u8,
1277    },
1278    EmptyCharacterValue {
1279        span: std::ops::Range<usize>,
1280    },
1281    InvalidFaceCode {
1282        span: std::ops::Range<usize>,
1283    },
1284    InvalidPrefixForSmallInteger {
1285        span: std::ops::Range<usize>,
1286    },
1287}
1288
1289impl From<ParseU8Error> for ParseWarning {
1290    fn from(value: ParseU8Error) -> Self {
1291        match value {
1292            ParseU8Error::SmallIntegerIsTooBig {
1293                span,
1294                knuth_pltotf_offset,
1295                radix,
1296            } => ParseWarning {
1297                span,
1298                knuth_pltotf_offset: Some(knuth_pltotf_offset),
1299                kind: ParseWarningKind::SmallIntegerIsTooBig { radix },
1300            },
1301            ParseU8Error::EmptyCharacterValue { span } => ParseWarning {
1302                span: span.clone(),
1303                knuth_pltotf_offset: Some(span.end),
1304                kind: ParseWarningKind::EmptyCharacterValue,
1305            },
1306            ParseU8Error::InvalidFaceCode { span } => ParseWarning {
1307                span: span.clone(),
1308                knuth_pltotf_offset: Some(span.end),
1309                kind: ParseWarningKind::InvalidFaceCode,
1310            },
1311            ParseU8Error::InvalidPrefixForSmallInteger { span } => ParseWarning {
1312                span: span.clone(),
1313                knuth_pltotf_offset: Some(span.end + 1),
1314                kind: ParseWarningKind::InvalidPrefixForSmallInteger,
1315            },
1316        }
1317    }
1318}
1319
1320fn parse_u8(input: &mut Input) -> Result<u8, ParseU8Error> {
1321    // PLtoTF.2014.51
1322    let parse_number = |input: &mut Input, radix: u8| {
1323        input.consume_spaces();
1324        let start_span = input.raw_data_span.start;
1325        let mut acc: u8 = 0;
1326        while let Some(c) = input.peek() {
1327            let n: u8 = match c.to_digit(radix as u32) {
1328                None => break,
1329                Some(d) => d.try_into().unwrap(),
1330            };
1331            input.next();
1332            match acc.checked_mul(radix).and_then(|l| l.checked_add(n)) {
1333                None => {
1334                    // Overflow has occurred.
1335                    // We advance to the end of the integer constant so that the span in the error is
1336                    // most accurate.
1337                    let knuth_pltotf_offset = input.raw_data_span.start;
1338                    while let Some(c) = input.peek() {
1339                        match c.to_digit(radix as u32) {
1340                            None => break,
1341                            Some(_) => input.next(),
1342                        };
1343                    }
1344                    let end_span = input.raw_data_span.start;
1345                    return Err(ParseU8Error::SmallIntegerIsTooBig {
1346                        span: start_span..end_span,
1347                        knuth_pltotf_offset,
1348                        radix,
1349                    });
1350                }
1351                Some(new_acc) => acc = new_acc,
1352            }
1353        }
1354        Ok(acc)
1355    };
1356    let u = match input.next() {
1357        // PLtoTF.2014.52
1358        Some('C' | 'c') => {
1359            input.consume_spaces();
1360            match input.next() {
1361                None => {
1362                    let span = input.last_span(None);
1363                    return Err(ParseU8Error::EmptyCharacterValue { span });
1364                }
1365                Some(c @ ' '..='~') => (c as usize).try_into().unwrap(),
1366                // if the character is not a visible ASCII char it is set to invalid=127.
1367                _ => 127,
1368            }
1369        }
1370        // PLtoTF.2014.52
1371        Some('D' | 'd') => parse_number(input, 10)?,
1372        // PLtoTF.2014.53
1373        Some('O' | 'o') => parse_number(input, 8)?,
1374        // PLtoTF.2014.54
1375        Some('H' | 'h') => parse_number(input, 16)?,
1376        // PLtoTF.2014.55
1377        Some('F' | 'f') => {
1378            input.consume_spaces();
1379            let span_start = input.raw_data_span.start;
1380            let mut acc: u8 = match input.next() {
1381                Some('M' | 'm') => 0,
1382                Some('B' | 'b') => 2,
1383                Some('L' | 'l') => 4,
1384                _ => 18,
1385            };
1386            acc += match input.next() {
1387                Some('R' | 'r') => 0,
1388                Some('I' | 'i') => 1,
1389                _ => 18,
1390            };
1391            acc += match input.next() {
1392                Some('R' | 'r') => 0,
1393                Some('C' | 'c') => 6,
1394                Some('E' | 'e') => 12,
1395                _ => 18,
1396            };
1397            if acc >= 18 {
1398                let span_end = input.raw_data_span.start;
1399                return Err(ParseU8Error::InvalidFaceCode {
1400                    span: span_start..span_end,
1401                });
1402            }
1403            acc
1404        }
1405        c => {
1406            let span = input.last_span(c);
1407            return Err(ParseU8Error::InvalidPrefixForSmallInteger { span });
1408        }
1409    };
1410    Ok(u)
1411}
1412
1413#[cfg(test)]
1414mod tests {
1415    use super::*;
1416
1417    fn run(source: &str, want: Vec<Root>, want_errs: Vec<ParseWarning>) {
1418        let (got, got_errors) = Ast::from_pl_source_code(source);
1419        assert_eq!(got_errors, want_errs);
1420        assert_eq!(got, Ast(want));
1421    }
1422
1423    macro_rules! ast_test {
1424        ( $( ($name: ident, $input: expr, $want: expr, $want_errors: expr, ), )+ ) => {
1425            $(
1426                #[test]
1427                fn $name() {
1428                    let input = $input;
1429                    let want = $want;
1430                    let want_errors = $want_errors;
1431                    run(input, want, want_errors);
1432                }
1433            )+
1434        };
1435    }
1436
1437    ast_test!(
1438        (
1439            string,
1440            r"(CODINGSCHEME MY CODING Scheme)",
1441            vec![Root::CodingScheme(SingleValue {
1442                data: "MY CODING Scheme".into(),
1443                data_span: 14..30,
1444            })],
1445            vec![],
1446        ),
1447        (
1448            boolean_true,
1449            r"(SEVENBITSAFEFLAG TRUE)",
1450            vec![Root::SevenBitSafeFlag(SingleValue {
1451                data: true,
1452                data_span: 18..19,
1453            })],
1454            vec![],
1455        ),
1456        (
1457            boolean_true_with_junk,
1458            r"(SEVENBITSAFEFLAG TRIPS)",
1459            vec![Root::SevenBitSafeFlag(SingleValue {
1460                data: true,
1461                data_span: 18..19
1462            })],
1463            vec![],
1464        ),
1465        (
1466            boolean_false,
1467            r"(SEVENBITSAFEFLAG FALSE)",
1468            vec![Root::SevenBitSafeFlag(SingleValue {
1469                data: false,
1470                data_span: 18..19
1471            })],
1472            vec![],
1473        ),
1474        (
1475            boolean_invalid,
1476            r"(SEVENBITSAFEFLAG INVALID)",
1477            vec![],
1478            vec![ParseWarning {
1479                span: 18..25,
1480                knuth_pltotf_offset: Some(19),
1481                kind: ParseWarningKind::InvalidBoolean,
1482            }],
1483        ),
1484        (
1485            one_byte_char_invalid_prefix,
1486            r"(BOUNDARYCHAR J a)",
1487            vec![Root::BoundaryChar(SingleValue {
1488                data: Char(0),
1489                data_span: 14..17
1490            })],
1491            vec![ParseWarning {
1492                span: 14..15,
1493                knuth_pltotf_offset: Some(16),
1494                kind: ParseWarningKind::InvalidPrefixForSmallInteger,
1495            }],
1496        ),
1497        (
1498            one_byte_char_no_prefix,
1499            r"(BOUNDARYCHAR)",
1500            vec![Root::BoundaryChar(SingleValue {
1501                data: Char(0),
1502                data_span: 13..13,
1503            })],
1504            vec![ParseWarning {
1505                span: 13..13,
1506                knuth_pltotf_offset: Some(14),
1507                kind: ParseWarningKind::InvalidPrefixForSmallInteger,
1508            }],
1509        ),
1510        (
1511            one_byte_char,
1512            r"(BOUNDARYCHAR C a)",
1513            vec![Root::BoundaryChar(SingleValue {
1514                data: 'a'.try_into().unwrap(),
1515                data_span: 14..17,
1516            })],
1517            vec![],
1518        ),
1519        (
1520            one_byte_missing,
1521            r"(BOUNDARYCHAR C)",
1522            vec![Root::BoundaryChar(SingleValue {
1523                data: Char(0),
1524                data_span: 14..15
1525            })],
1526            vec![ParseWarning {
1527                span: 15..15,
1528                knuth_pltotf_offset: Some(15),
1529                kind: ParseWarningKind::EmptyCharacterValue,
1530            }],
1531        ),
1532        /*
1533        TODO: re-enable when the bug is fixed
1534        (
1535            invalid_character_value_tab,
1536            "(BOUNDARYCHAR C \t)",
1537            vec![Root::BoundaryChar(SingleValue {
1538                data: Char(0), // probably Char(127)
1539                data_span: 14..16
1540            })],
1541            vec![
1542                ParseWarning::NonVisibleAsciiCharacter('\t', 16),
1543                ParseWarning::EmptyCharacterValue { span: 16..16 }
1544            ],
1545        ),
1546         */
1547        (
1548            one_byte_octal,
1549            r"(BOUNDARYCHAR O 77)",
1550            vec![Root::BoundaryChar(SingleValue {
1551                data: Char(0o77),
1552                data_span: 14..18
1553            })],
1554            vec![],
1555        ),
1556        (
1557            one_byte_octal_too_big,
1558            r"(BOUNDARYCHAR O 7777)",
1559            vec![Root::BoundaryChar(SingleValue {
1560                data: Char(0o0),
1561                data_span: 14..20
1562            })],
1563            vec![ParseWarning {
1564                span: 16..20,
1565                knuth_pltotf_offset: Some(19),
1566                kind: ParseWarningKind::SmallIntegerIsTooBig { radix: 8 }
1567            }],
1568        ),
1569        (
1570            one_byte_decimal,
1571            r"(BOUNDARYCHAR D 77)",
1572            vec![Root::BoundaryChar(SingleValue {
1573                data: Char(77),
1574                data_span: 14..18
1575            })],
1576            vec![],
1577        ),
1578        (
1579            one_byte_decimal_too_big,
1580            r"(BOUNDARYCHAR D 7777)",
1581            vec![Root::BoundaryChar(SingleValue {
1582                data: Char(0),
1583                data_span: 14..20
1584            })],
1585            vec![ParseWarning {
1586                span: 16..20,
1587                knuth_pltotf_offset: Some(19),
1588                kind: ParseWarningKind::SmallIntegerIsTooBig { radix: 10 }
1589            }],
1590        ),
1591        (
1592            one_byte_hexadecimal,
1593            r"(BOUNDARYCHAR H 17)",
1594            vec![Root::BoundaryChar(SingleValue {
1595                data: Char(0x17),
1596                data_span: 14..18
1597            })],
1598            vec![],
1599        ),
1600        (
1601            one_byte_hexadecimal_too_big,
1602            r"(BOUNDARYCHAR H 1777)",
1603            vec![Root::BoundaryChar(SingleValue {
1604                data: Char(0x0),
1605                data_span: 14..20
1606            })],
1607            vec![ParseWarning {
1608                span: 16..20,
1609                knuth_pltotf_offset: Some(19),
1610                kind: ParseWarningKind::SmallIntegerIsTooBig { radix: 16 }
1611            }],
1612        ),
1613        (
1614            one_byte_face,
1615            r"(BOUNDARYCHAR F BIC)",
1616            vec![Root::BoundaryChar(SingleValue {
1617                data: Char(9),
1618                data_span: 14..19
1619            })],
1620            vec![],
1621        ),
1622        (
1623            one_byte_face_invalid,
1624            r"(BOUNDARYCHAR F ABC)",
1625            vec![Root::BoundaryChar(SingleValue {
1626                data: Char(0),
1627                data_span: 14..19
1628            })],
1629            vec![ParseWarning {
1630                span: 16..19,
1631                knuth_pltotf_offset: Some(19),
1632                kind: ParseWarningKind::InvalidFaceCode,
1633            }],
1634        ),
1635        (
1636            one_byte_four_byte,
1637            r"(Header D19HA)",
1638            vec![Root::Header(TupleValue {
1639                left: DecimalU8(19),
1640                left_span: 8..11,
1641                right: 0xA,
1642                right_span: 11..13,
1643            })],
1644            vec![],
1645        ),
1646        (
1647            four_bytes_octal,
1648            r"(CHECKSUM O 77)",
1649            vec![Root::Checksum(SingleValue {
1650                data: 0o77,
1651                data_span: 10..14
1652            })],
1653            vec![],
1654        ),
1655        (
1656            four_bytes_hexadecimal,
1657            r"(CHECKSUM H 77)",
1658            vec![Root::Checksum(SingleValue {
1659                data: 0x77,
1660                data_span: 10..14
1661            })],
1662            vec![],
1663        ),
1664        (
1665            four_bytes_missing_prefix,
1666            r"(CHECKSUM)",
1667            vec![Root::Checksum(SingleValue {
1668                data: 0,
1669                data_span: 9..9
1670            })],
1671            vec![ParseWarning {
1672                span: 9..9,
1673                knuth_pltotf_offset: Some(9),
1674                kind: ParseWarningKind::InvalidPrefixForInteger { prefix: None },
1675            }],
1676        ),
1677        (
1678            four_bytes_invalid_prefix,
1679            r"(CHECKSUM W 77)",
1680            vec![Root::Checksum(SingleValue {
1681                data: 0,
1682                data_span: 10..11
1683            })],
1684            vec![ParseWarning {
1685                span: 10..11,
1686                knuth_pltotf_offset: Some(11),
1687                kind: ParseWarningKind::InvalidPrefixForInteger { prefix: Some('W') },
1688            }],
1689        ),
1690        (
1691            four_bytes_too_big,
1692            r"(CHECKSUM O 666666666666666666)",
1693            vec![Root::Checksum(SingleValue {
1694                data: 0o6666666666,
1695                data_span: 10..30
1696            })],
1697            vec![ParseWarning {
1698                span: 12..30,
1699                knuth_pltotf_offset: Some(23),
1700                kind: ParseWarningKind::IntegerIsTooBig { radix: 8 }
1701            }],
1702        ),
1703        (
1704            four_bytes_invalid_octal_digit,
1705            r"(CHECKSUM O 666686666666666666)",
1706            vec![Root::Checksum(SingleValue {
1707                data: 0o6666,
1708                data_span: 10..30
1709            })],
1710            vec![ParseWarning {
1711                span: 16..17,
1712                knuth_pltotf_offset: Some(17),
1713                kind: ParseWarningKind::InvalidOctalDigit { invalid_digit: '8' }
1714            }],
1715        ),
1716        (
1717            fix_word_integer,
1718            r"(DESIGNSIZE D 1)",
1719            vec![Root::DesignSize(SingleValue {
1720                data: FixWord::ONE.into(),
1721                data_span: 12..15,
1722            })],
1723            vec![],
1724        ),
1725        (
1726            fix_word_decimal,
1727            r"(DESIGNSIZE D 11.5)",
1728            vec![Root::DesignSize(SingleValue {
1729                data: (FixWord::ONE * 23 / 2).into(),
1730                data_span: 12..18,
1731            })],
1732            vec![],
1733        ),
1734        (
1735            fix_word_negative,
1736            r"(CHARACTER C X (CHARWD D -11.5))",
1737            vec![Root::Character(Branch {
1738                data: Char::X,
1739                data_span: 11..14,
1740                children: vec![Character::Width(SingleValue {
1741                    data: Some(FixWord::ONE * -23 / 2),
1742                    data_span: 23..30,
1743                })]
1744            }),],
1745            vec![],
1746        ),
1747        (
1748            fix_word_too_big,
1749            r"(DESIGNSIZE D 2047.9999999)",
1750            vec![Root::DesignSize(SingleValue {
1751                data: FixWord::ONE.into(),
1752                data_span: 12..26,
1753            })],
1754            vec![ParseWarning {
1755                span: 14..26,
1756                knuth_pltotf_offset: Some(14),
1757                kind: ParseWarningKind::DecimalNumberIsTooBig,
1758            }],
1759        ),
1760        (
1761            pl_to_tf_section_7_example,
1762            r"
1763            (FAMILY NOVA)
1764            (FACE F MIE)
1765            (CODINGSCHEME ASCII)
1766            (DESIGNSIZE D 10)
1767            (DESIGNUNITS D 18)
1768            (COMMENT A COMMENT IS IGNORED)
1769            (COMMENT (EXCEPT THIS ONE ISN'T))
1770            (COMMENT (ACTUALLY IT IS, EVEN THOUGH
1771                    IT SAYS IT ISN'T))
1772            (FONTDIMEN
1773               (SLANT R -.25)
1774               (SPACE D 6)
1775               (SHRINK D 2)
1776               (STRETCH D 3)
1777               (XHEIGHT R 10.55)
1778               (QUAD D 18)
1779               )
1780            (LIGTABLE
1781               (LABEL C f)
1782               (LIG C f O 200)
1783               (SKIP D 1)
1784               (LABEL O 200)
1785               (LIG C i O 201)
1786               (KRN O 51 R 1.5)
1787               (/LIG C ? C f)
1788               (STOP)
1789               )
1790            (CHARACTER C f
1791               (CHARWD D 6)
1792               (CHARHT R 13.5)
1793               (CHARIC R 1.5)
1794               )",
1795            vec![
1796                Root::Family(SingleValue {
1797                    data: "NOVA".into(),
1798                    data_span: 21..25,
1799                }),
1800                Root::Face(SingleValue {
1801                    data: Face::Valid(
1802                        crate::FaceWeight::Medium,
1803                        crate::FaceSlope::Italic,
1804                        crate::FaceExpansion::Extended
1805                    ),
1806                    data_span: 45..50,
1807                }),
1808                Root::CodingScheme(SingleValue {
1809                    data: "ASCII".into(),
1810                    data_span: 78..83,
1811                }),
1812                Root::DesignSize(SingleValue {
1813                    data: (FixWord::ONE * 10).into(),
1814                    data_span: 109..113,
1815                }),
1816                Root::DesignUnits(SingleValue {
1817                    data: FixWord::ONE * 18,
1818                    data_span: 140..144,
1819                }),
1820                Root::Comment(" A COMMENT IS IGNORED".into()),
1821                Root::Comment(" (EXCEPT THIS ONE ISN'T)".into()),
1822                Root::Comment(format![
1823                    " (ACTUALLY IT IS, EVEN THOUGH\n{}IT SAYS IT ISN'T)",
1824                    " ".repeat(20)
1825                ]),
1826                Root::FontDimension(Branch {
1827                    data: (),
1828                    data_span: 362..362,
1829                    children: vec![
1830                        FontDimension::NamedParam(
1831                            NamedParameter::Slant,
1832                            SingleValue {
1833                                data: FixWord::ONE * -1 / 4,
1834                                data_span: 369..375,
1835                            }
1836                        ),
1837                        FontDimension::NamedParam(
1838                            NamedParameter::Space,
1839                            SingleValue {
1840                                data: FixWord::ONE * 6,
1841                                data_span: 399..402,
1842                            }
1843                        ),
1844                        FontDimension::NamedParam(
1845                            NamedParameter::Shrink,
1846                            SingleValue {
1847                                data: FixWord::ONE * 2,
1848                                data_span: 427..430,
1849                            }
1850                        ),
1851                        FontDimension::NamedParam(
1852                            NamedParameter::Stretch,
1853                            SingleValue {
1854                                data: FixWord::ONE * 3,
1855                                data_span: 456..459,
1856                            }
1857                        ),
1858                        FontDimension::NamedParam(
1859                            NamedParameter::XHeight,
1860                            SingleValue {
1861                                data: FixWord(1055 * FixWord::ONE.0 / 100 + 1),
1862                                data_span: 485..492,
1863                            }
1864                        ),
1865                        FontDimension::NamedParam(
1866                            NamedParameter::Quad,
1867                            SingleValue {
1868                                data: FixWord::ONE * 18,
1869                                data_span: 515..519,
1870                            }
1871                        ),
1872                    ]
1873                }),
1874                Root::LigTable(Branch {
1875                    data: (),
1876                    data_span: 575..575,
1877                    children: vec![
1878                        LigTable::Label(SingleValue {
1879                            data: LigTableLabel::Char('f'.try_into().unwrap()),
1880                            data_span: 582..585,
1881                        }),
1882                        LigTable::Lig(
1883                            PostLigOperation::RetainNeitherMoveToInserted,
1884                            TupleValue {
1885                                left: 'f'.try_into().unwrap(),
1886                                left_span: 607..610,
1887                                right: Char(0o200),
1888                                right_span: 611..616,
1889                            }
1890                        ),
1891                        LigTable::Skip(SingleValue {
1892                            data: DecimalU8(1),
1893                            data_span: 639..642
1894                        }),
1895                        LigTable::Label(SingleValue {
1896                            data: LigTableLabel::Char(Char(0o200)),
1897                            data_span: 666..671,
1898                        }),
1899                        LigTable::Lig(
1900                            PostLigOperation::RetainNeitherMoveToInserted,
1901                            TupleValue {
1902                                left: 'i'.try_into().unwrap(),
1903                                left_span: 693..696,
1904                                right: Char(0o201),
1905                                right_span: 697..702,
1906                            }
1907                        ),
1908                        LigTable::Kern(TupleValue {
1909                            left: Char(0o51),
1910                            left_span: 724..728,
1911                            right: FixWord::ONE * 3 / 2,
1912                            right_span: 729..734,
1913                        }),
1914                        LigTable::Lig(
1915                            PostLigOperation::RetainLeftMoveNowhere,
1916                            TupleValue {
1917                                left: '?'.try_into().unwrap(),
1918                                left_span: 757..760,
1919                                right: 'f'.try_into().unwrap(),
1920                                right_span: 761..764,
1921                            }
1922                        ),
1923                        LigTable::Stop(SingleValue {
1924                            data: (),
1925                            data_span: 786..786,
1926                        }),
1927                    ]
1928                }),
1929                Root::Character(Branch {
1930                    data: 'f'.try_into().unwrap(),
1931                    data_span: 828..831,
1932                    children: vec![
1933                        Character::Width(SingleValue {
1934                            data: Some(FixWord::ONE * 6),
1935                            data_span: 855..858,
1936                        }),
1937                        Character::Height(SingleValue {
1938                            data: FixWord::ONE * 27 / 2,
1939                            data_span: 883..889,
1940                        }),
1941                        Character::ItalicCorrection(SingleValue {
1942                            data: FixWord::ONE * 3 / 2,
1943                            data_span: 914..919,
1944                        }),
1945                    ]
1946                })
1947            ],
1948            vec![],
1949        ),
1950    );
1951}