1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
//! Knuth–Morris–Pratt substring search algorithm
//!
//! This module contains an optimal in time algorithm for finding a substring
//! in a string. By 'string' and 'substring' we mean a vector of elements of the same type.
//! The algorithm is due to
//! [Knuth, Morris and Pratt](https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm).
//!
//! The API for this module is based on two assumptions:
//!
//! - There may be multiple searches for the same substring in different strings.
//!
//! - The elements of the string may be generated on the demand as the search progresses.
//!    That is, the full string is not necessarily known at the start.
//!
//! ## Example
//!
//! To use the algorithm, a [Matcher] is first created.
//! This factory takes ownership of the substring.
//! On initialization the matcher computes a number of internal
//! quantities which make the subsequent matching fast.
//! These quantities depend on the substring, so mutating the substring
//! after it has been passed to the matcher is statically prevented.
//!
//! To match a string, a new [Search] instance is created by calling [Matcher::start]. Elements
//! of the string are passed in one at a time to the `next` method
//! of the matcher.
//! If the substring has length `m` and matches the last `m` elements that
//! have been passed in, the `next` method returns `true`.
//! Otherwise it returns `false`.
//! The matcher may be used to find multiple instances of the substring
//! in the same string.
//!
//! ```
//! # use texcraft_stdext::algorithms::substringsearch::Matcher;
//! # use texcraft_stdext::collections::nevec::Nevec;
//! # use texcraft_stdext::nevec;
//!
//! let substring = nevec![2, 3, 2];
//! let matcher = Matcher::new(substring);
//! let mut search = matcher.start();
//! assert_eq![search.next(&1), false];
//! assert_eq![search.next(&2), false];
//! assert_eq![search.next(&3), false];
//! assert_eq![search.next(&2), true];
//! assert_eq![search.next(&3), false];
//! assert_eq![search.next(&2), true];
//! ```
//!
use crate::collections::nevec::Nevec;

/// Data structure used to match a specific substring in many strings.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Matcher<T: PartialEq> {
    substring: Nevec<T>,
    prefix_fn: Nevec<usize>,
}

impl<T: PartialEq> Matcher<T> {
    /// Create a new matcher that searches for the provide substring.
    pub fn new(substring: Nevec<T>) -> Matcher<T> {
        let mut prefix_fn = Nevec::with_capacity(0, substring.len());
        let mut k = 0;
        for i in 1..substring.len() {
            while k > 0 && substring[k] != substring[i] {
                k = prefix_fn[k - 1];
            }
            if substring[k] == substring[i] {
                k += 1;
            }
            prefix_fn.push(k);
        }

        Matcher {
            substring,
            prefix_fn,
        }
    }

    /// Start a new substring search.
    pub fn start(&self) -> Search<T> {
        Search {
            factory: self,
            q: 0,
        }
    }

    /// Get an immutable reference to the underlying substring.
    //
    // Obtaining a mutable reference is not supported as internal details of
    // the matcher factory rely on the substring remaining constant.
    pub fn substring(&self) -> &Nevec<T> {
        &self.substring
    }

    /// Retake ownership of the underlying substring.
    pub fn take_substring(self) -> Nevec<T> {
        self.substring
    }
}

/// Data structure used to search for specific substring within a specific string.
pub struct Search<'a, T: PartialEq> {
    factory: &'a Matcher<T>,
    q: usize,
}

impl<'a, T: PartialEq> Search<'a, T> {
    /// Provide the next element of the string to the matcher.
    /// This returns true if the last `m` elements of the string match the substring, where
    /// `m` is the length of the substring.
    pub fn next(&mut self, tail: &T) -> bool {
        while self.q > 0 && &self.factory.substring[self.q] != tail {
            self.q = self.factory.prefix_fn[self.q - 1];
        }
        if &self.factory.substring[self.q] == tail {
            self.q += 1;
        }
        if self.q == self.factory.substring.len() {
            self.q = self.factory.prefix_fn[self.q - 1];
            return true;
        }
        false
    }
}