summaryrefslogtreecommitdiff
path: root/src/token/tokenise.rs
blob: 743051bbb2944d9172ae1bd9b9bd13baafd0523f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
/*
	Copyright 2023-2024 Gabriel Bjørnager Jensen.

	This file is part of eAS.

	eAS is free software: you can redistribute it
	and/or modify it under the terms of the GNU
	General Public License as published by the Free
	Software Foundation, either version 3 of the
	License, or (at your option) any later version.

	eAS is distributed in the hope that it will
	be useful, but WITHOUT ANY WARRANTY; without
	even the implied warranty of MERCHANTABILITY or
	FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	General Public License for more details.

	You should have received a copy of the GNU
	General Public License along with eAS. If not,
	see <https://www.gnu.org/licenses/>.
*/

use crate::is_valid_character;
use crate::error::Error;
use crate::source_location::SourceLocation;
use crate::token::Token;

impl Token {
	#[must_use]
	pub fn tokenise(input: &str, location: &mut SourceLocation) -> Result<Vec<(SourceLocation, Self)>, Error> {
		let mut tokens: Vec<(SourceLocation, Self)> = Vec::new();

		let mut input_index: usize = 0x0;
		while let Some(token) = get_next_token(&input, &mut input_index, location)? { tokens.push(token) }

		return Ok(tokens);
	}
}

#[must_use]
fn get_next_token(input: &str, index: &mut usize, location: &mut SourceLocation) -> Result<Option<(SourceLocation, Token)>, Error> {
	use Token::*;

	for c in input.chars().skip(*index) {
		if !is_valid_character(c) { return Err(Error::IllegalCharacter(c, location.clone()) ) };

		// There aren't any more things to complete
		// (comments, strings, or words), so we know now
		// that no more characters will be skipped.

		let token_start = location.clone();

		match c {
			| ' '
			| '\t'
			| '\n'
			| '['
			| ']'
			| '.'
			| ','
			| '#'
			| ';'
			| '"'
			=> {
				*index += 0x1;
				location.next_column();
			},

			_ => {},
		};

		match c {
			| ' '
			| '\t'
			=> continue,

			'\n' => {
				location.return_carriage();
				return Ok(Some((token_start, Return)));
			},

			'['  => return Ok(Some((token_start, BracketLeft))),
			']'  => return Ok(Some((token_start, BracketRight))),
			'.'  => return Ok(Some((token_start, FullStop))),
			','  => return Ok(Some((token_start, Comma))),
			'#'  => return Ok(Some((token_start, Hashtag))),

			';' => {
				skip_line(input, index, location);
				return Ok(Some((token_start, Return)));
			},

			'"' => {
				return match complete_string(input, index, location) {
					Ok(string) => Ok(Some((token_start, StringLiteral(string)))),
					_          => Err(Error::UnterminatedString(token_start)),
				};
			}

			_ => {},
		};

		match complete_word(input, index, location) {
			Some(word) => return Ok(Some((token_start, Word(word)))),
			_          => {},
		};
	}

	return Ok(None);
}

#[must_use]
fn complete_word(input: &str, index: &mut usize, location: &mut SourceLocation) -> Option<String> {
	let mut buffer = String::new();

	for c in input.chars().skip(*index) {
		match c {
			| ' '
			| '\t'
			| '\n'
			| '.'
			| ','
			| ';'
			=> return Some(buffer),

			_ => buffer.push(c),
		}

		// Don't count the terminating character.
		*index += 0x1;
		location.next_column();
	}

	return None;
}

#[must_use]
fn complete_string(input: &str, index: &mut usize, location: &mut SourceLocation) -> Result<String, ()> {
	let mut buffer = String::new();

	for c in input.chars().skip(*index) {
		*index += 0x1;

		match c {
			'\n' => return Err(()),
			'"'  => return Ok(buffer),
			_    => {},
		};

		location.next_column();

		buffer.push(c);
	}

	return Err(());
}

fn skip_line(input: &str, index: &mut usize, location: &mut SourceLocation) {
	for c in input.chars().skip(*index) {
		// Skip until we're out of the comment.
		*index += 0x1;
		if c == '\n' { break };
	}

	location.return_carriage();
}