use super::{Range, LexError, Scanner, Location};
use crate::lexeme::{Lexeme, Lexeme::*, RId::Module};
use crate::utils::Result3::*;
use std::fmt::{Display, Formatter};
use crate::scanner::layout::AugmentedLexeme::{PhantomCloseCurlyBracket, PhantomSemicolon, PhantomOpenCurlyBracket, Real};
use crate::utils::iter::IterStream;
use std::collections::VecDeque;
pub struct RawLexemeIterator<I: std::io::Read> {
scanner: Scanner<I>,
error: Option<LexError>,
}
impl<I: std::io::Read> Iterator for RawLexemeIterator<I> {
type Item = Lexeme;
fn next(&mut self) -> Option<Lexeme> {
self.enriched_next(|_| ()).map(|t| t.0)
}
}
impl<I: std::io::Read> From<Scanner<I>> for RawLexemeIterator<I> {
fn from(scanner: Scanner<I>) -> Self {
Self {
error: None,
scanner,
}
}
}
impl<I: std::io::Read> RawLexemeIterator<I> {
pub fn new(input: I) -> Self { Self::from(Scanner::new(input)) }
pub fn into_scanner(self) -> (Option<LexError>, Scanner<I>) { (self.error, self.scanner) }
fn enriched_next<T>(&mut self, proc: impl FnOnce(&Scanner<I>) -> T) -> Option<(Lexeme, T)> {
if self.error.is_some() { return None; }
let _ = self.scanner.whitespace();
let val = proc(&mut self.scanner);
match self.scanner.next_lexeme() {
Success(x) => Some((x, val)),
RetryLater(_) => None,
FailFast(err) => {
self.error = Some(err);
None
}
}
}
}
pub struct FatLexemeIterator<I: std::io::Read> {
iterator: RawLexemeIterator<I>,
location: Location,
}
impl<I: std::io::Read> Iterator for FatLexemeIterator<I> {
type Item = (Lexeme, Range);
fn next(&mut self) -> Option<(Lexeme, Range)> {
let (x, location) = self.iterator.enriched_next(|s| s.location)?;
self.location = location;
Some((x, Range {
begin: location,
end: self.iterator.scanner.location,
}))
}
}
impl<I: std::io::Read> From<RawLexemeIterator<I>> for FatLexemeIterator<I> {
fn from(iterator: RawLexemeIterator<I>) -> Self {
Self {
location: iterator.scanner.location,
iterator,
}
}
}
impl<I: std::io::Read> FatLexemeIterator<I> {
pub fn new(input: I) -> Self { Self::from(RawLexemeIterator::<I>::new(input)) }
pub fn into_scanner(self) -> (Option<LexError>, Scanner<I>) { self.iterator.into_scanner() }
}
enum LastLexeme {
LetWhereDoOf,
StartOfFile,
PassThrough,
Other,
}
#[derive(Debug, Eq, PartialEq)]
pub enum EnrichedLexeme {
CurlyN(usize),
AngleN(usize),
Normal(Lexeme, Range),
}
impl Display for EnrichedLexeme {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use EnrichedLexeme::*;
match self {
CurlyN(n) => write!(f, "{{{}}}", n),
AngleN(n) => write!(f, "<{}>", n),
Normal(lexeme, range) => write!(f, "{}: {}", range, lexeme)
}
}
}
impl From<(Lexeme, Range)> for EnrichedLexeme {
fn from((lexeme, range): (Lexeme, Range)) -> Self {
EnrichedLexeme::Normal(lexeme, range)
}
}
pub struct EnrichedLexemeIterator<I: std::io::Read> {
iterator: IterStream<FatLexemeIterator<I>>,
last_lexeme: LastLexeme,
last_line: usize,
}
impl<I: std::io::Read> EnrichedLexemeIterator<I> {
pub fn new(input: I) -> Self { Self::from(FatLexemeIterator::<I>::new(input)) }
pub fn into_scanner(self) -> (Option<LexError>, Scanner<I>) { self.iterator.unwrap().into_scanner() }
}
impl<I: std::io::Read> From<FatLexemeIterator<I>> for EnrichedLexemeIterator<I> {
fn from(iterator: FatLexemeIterator<I>) -> Self {
Self {
iterator: IterStream::from(iterator),
last_lexeme: LastLexeme::StartOfFile,
last_line: 0,
}
}
}
impl<I: std::io::Read> Iterator for EnrichedLexemeIterator<I> {
type Item = EnrichedLexeme;
fn next(&mut self) -> Option<Self::Item> {
use LastLexeme::*;
use EnrichedLexeme::*;
let next = self.iterator.peek(0);
match self.last_lexeme {
LetWhereDoOf if next.is_none() || next.unwrap().0 != OpenCurlyBracket => {
self.last_lexeme = PassThrough;
let n = next.map_or(0, |t| t.1.begin.column);
Some(CurlyN(n))
}
StartOfFile if next.is_some()
&& ![OpenCurlyBracket, ReservedId(Module)]
.contains(&next.unwrap().0) => {
self.last_lexeme = PassThrough;
let n = next.unwrap().1.begin.column;
Some(CurlyN(n))
}
Other if next.is_some() && next.unwrap().1.begin.line > self.last_line => {
self.last_line = next.unwrap().1.begin.line;
let n = next.unwrap().1.begin.column;
Some(AngleN(n))
}
_ => {
let (lexeme, range) = self.iterator.next()?;
self.last_line = range.end.line;
use crate::lexeme::Lexeme::ReservedId as R;
use crate::lexeme::RId::*;
self.last_lexeme = match lexeme {
R(Let) | R(Where) | R(Do) | R(Of) => LetWhereDoOf,
_ => Other,
};
Some(Normal(lexeme, range))
}
}
}
}
pub enum AugmentedLexeme {
Real(Lexeme, Range),
PhantomOpenCurlyBracket,
PhantomCloseCurlyBracket,
PhantomSemicolon,
}
impl Display for AugmentedLexeme {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Real(t, range) => write!(f, "{}: {}", range, t),
PhantomOpenCurlyBracket => write!(f, "<phantom>: {{"),
PhantomCloseCurlyBracket => write!(f, "<phantom>: }}"),
PhantomSemicolon => write!(f, "<phantom>: ;"),
}
}
}
pub struct AugmentedLexemeIterator<I: std::io::Read> {
iterator: IterStream<EnrichedLexemeIterator<I>>,
indents: Vec<usize>,
buffer: VecDeque<AugmentedLexeme>,
}
impl<'a, I: std::io::Read> AugmentedLexemeIterator<I> {
pub fn new(input: I) -> Self { Self::from(EnrichedLexemeIterator::new(input)) }
pub fn into_scanner(self) -> (Option<LexError>, Scanner<I>) { self.iterator.unwrap().into_scanner() }
fn prepare_next(&mut self) {
let t = self.iterator.next();
if t.is_none() {
if let Some(k) = self.indents.pop() {
if k == 0 { panic!("mismatched curly brackets.") }
self.buffer.push_back(PhantomCloseCurlyBracket)
}
return;
}
use EnrichedLexeme::*;
match (t.unwrap(), self.indents.last().copied()) {
(AngleN(n), Some(m)) if m == n =>
self.buffer.push_back(PhantomSemicolon),
(AngleN(n), Some(m)) if n < m => {
self.iterator.put_back(AngleN(n));
self.indents.pop();
self.buffer.push_back(PhantomCloseCurlyBracket)
}
(AngleN(_), _) => self.prepare_next(),
(CurlyN(n), m) if m.is_none() || n > m.unwrap() => {
self.indents.push(n);
self.buffer.push_back(PhantomOpenCurlyBracket)
}
(CurlyN(n), _) => {
self.buffer.push_back(PhantomOpenCurlyBracket);
self.buffer.push_back(PhantomCloseCurlyBracket);
self.iterator.put_back(AngleN(n))
}
(Normal(CloseCurlyBracket, loc), Some(k)) => {
assert_eq!(k, 0, "mismatched curly brackets.");
self.indents.pop();
self.buffer.push_back(Real(CloseCurlyBracket, loc))
}
(Normal(OpenCurlyBracket, loc), _) => {
self.indents.push(0);
self.buffer.push_back(Real(OpenCurlyBracket, loc))
}
(Normal(t, loc), _) => {
self.buffer.push_back(Real(t, loc))
}
}
}
}
impl<'a, I: std::io::Read> From<EnrichedLexemeIterator<I>> for AugmentedLexemeIterator<I> {
fn from(iterator: EnrichedLexemeIterator<I>) -> Self {
AugmentedLexemeIterator {
iterator: IterStream::from(iterator),
buffer: VecDeque::new(),
indents: Vec::new(),
}
}
}
impl<'a, I: std::io::Read> Iterator for AugmentedLexemeIterator<I> {
type Item = AugmentedLexeme;
fn next(&mut self) -> Option<AugmentedLexeme> {
self.prepare_next();
self.buffer.pop_front()
}
}
#[cfg(test)]
mod tests {
use indoc::indoc;
use super::RawLexemeIterator;
use super::EnrichedLexemeIterator;
use crate::lexeme::Lexeme::*;
use crate::lexeme::RId::*;
use crate::lexeme::ROp::*;
const TEST_SOURCE: &str = indoc! {r#"
module Main where
import Prelude hiding (Integer)
main :: IO ()
main = do
name <- getLine
putStrLn ("Hello, " <> name <> "!")
pure ()
"#};
#[test]
fn test_raw_iterator() {
let mut it = RawLexemeIterator::new(TEST_SOURCE.as_bytes());
assert!(it.by_ref().eq([
ReservedId(Module),
Identifier("Main".to_string()),
ReservedId(Where),
ReservedId(Import),
Identifier("Prelude".to_string()),
Identifier("hiding".to_string()),
OpenParenthesis,
Identifier("Integer".to_string()),
CloseParenthesis,
Identifier("main".to_string()),
ReservedOp(ColonColon),
Identifier("IO".to_string()),
OpenParenthesis,
CloseParenthesis,
Identifier("main".to_string()),
ReservedOp(EqualSign),
ReservedId(Do),
Identifier("name".to_string()),
ReservedOp(LeftArrow),
Identifier("getLine".to_string()),
Identifier("putStrLn".to_string()),
OpenParenthesis,
StringLiteral("Hello, ".to_string()),
Operator("<>".to_string()),
Identifier("name".to_string()),
Operator("<>".to_string()),
StringLiteral("!".to_string()),
CloseParenthesis,
Identifier("pure".to_string()),
OpenParenthesis,
CloseParenthesis,
].iter().cloned()));
let (err, _) = it.into_scanner();
assert_eq!(err, None);
}
#[test]
fn test_enriched_iterator() {
use expect_test::expect;
let mut it = EnrichedLexemeIterator::new(TEST_SOURCE.as_bytes());
let mut res = String::new();
for t in it.by_ref() { res += &format!("{}\n", t) }
expect![[r#"
1:1-1:7: module
1:8-1:12: Main
1:13-1:18: where
{1}
2:1-2:7: import
2:8-2:15: Prelude
2:16-2:22: hiding
2:23-2:24: (
2:24-2:31: Integer
2:31-2:32: )
<1>
3:1-3:5: main
3:6-3:8: ::
3:9-3:11: IO
3:12-3:13: (
3:13-3:14: )
<1>
4:1-4:5: main
4:6-4:7: =
4:8-4:10: do
{5}
5:5-5:9: name
5:10-5:12: <-
5:13-5:20: getLine
<5>
6:5-6:13: putStrLn
6:14-6:15: (
6:15-6:24: "Hello, "
6:25-6:27: <>
6:28-6:32: name
6:33-6:35: <>
6:36-6:39: "!"
6:39-6:40: )
<5>
7:5-7:9: pure
7:10-7:11: (
7:11-7:12: )
"#]].assert_eq(&res);
let (err, _) = it.into_scanner();
assert_eq!(err, None);
}
}