Skip to content

Commit b3e1e4f

Browse files
committed
breaking things out, refactoring, now uses proper lexing to tokens, but still one pass
1 parent fc910a2 commit b3e1e4f

File tree

7 files changed

+2445
-2173
lines changed

7 files changed

+2445
-2173
lines changed

lib/include/cpp-json/Reader.h

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
2+
#ifndef READER_H_
3+
#define READER_H_
4+
5+
#include <cassert>
6+
#include <cstddef>
7+
#include <optional>
8+
#include <regex>
9+
#include <stack>
10+
#include <string>
11+
#include <string_view>
12+
13+
template <class Ch>
14+
class BasicReader {
15+
public:
16+
struct Location {
17+
size_t line;
18+
size_t column;
19+
};
20+
21+
public:
22+
/**
23+
* @brief Construct a new Basic Reader object for lexing a string
24+
*
25+
* @param input
26+
*/
27+
explicit BasicReader(std::basic_string_view<Ch> input) noexcept
28+
: input_(input) {
29+
}
30+
31+
BasicReader() = default;
32+
BasicReader(const BasicReader &other) = default;
33+
BasicReader &operator=(const BasicReader &rhs) = default;
34+
~BasicReader() = default;
35+
36+
public:
37+
/**
38+
* @brief Returns true if the reader is at the end of the stream
39+
*
40+
* @return bool
41+
*/
42+
bool eof() const noexcept {
43+
return index_ == input_.size();
44+
}
45+
46+
/**
47+
* @brief Returns the next character in the string without advancing the position
48+
*
49+
* @return Ch
50+
*/
51+
Ch peek() const noexcept {
52+
if (eof()) {
53+
return '\0';
54+
}
55+
56+
return input_[index_];
57+
}
58+
59+
/**
60+
* @brief Returns the next character in the string and advances the position
61+
*
62+
* @return Ch
63+
*/
64+
Ch read() noexcept {
65+
if (eof()) {
66+
return '\0';
67+
}
68+
69+
return input_[index_++];
70+
}
71+
72+
/**
73+
* @brief Consumes while the next character is in the input set <chars>
74+
* and returns the number of consumed characters
75+
*
76+
* @param chars
77+
* @return size_t
78+
*/
79+
size_t consume(std::basic_string_view<Ch> chars) noexcept {
80+
return consume_while([chars](Ch ch) {
81+
return chars.find(ch) != std::basic_string_view<Ch>::npos;
82+
});
83+
}
84+
85+
/**
86+
* @brief Consumes while the next character is whitespace (tab or space)
87+
* and returns the number of consumed characters
88+
*
89+
* @return size_t
90+
*/
91+
size_t consume_whitespace() noexcept {
92+
return consume_while([](Ch ch) {
93+
return (ch == ' ' || ch == '\t');
94+
});
95+
}
96+
97+
/**
98+
* @brief Consumes while a given predicate function returns true
99+
* and returns the number of consumed characters
100+
*
101+
* @param pred
102+
* @return size_t
103+
*/
104+
template <class Pred>
105+
size_t consume_while(Pred pred) noexcept {
106+
size_t count = 0;
107+
while (!eof()) {
108+
const Ch ch = peek();
109+
if (!pred(ch)) {
110+
break;
111+
}
112+
113+
++index_;
114+
++count;
115+
}
116+
return count;
117+
}
118+
119+
/**
120+
* @brief Returns true and advances the position
121+
* if the next character matches <ch>
122+
*
123+
* @param ch
124+
* @return bool
125+
*/
126+
bool match(Ch ch) noexcept {
127+
if (peek() != ch) {
128+
return false;
129+
}
130+
131+
++index_;
132+
return true;
133+
}
134+
135+
/**
136+
* @brief Returns true and advances the position
137+
* if the next sequences of characters matches <s>
138+
*
139+
* @param s
140+
* @return bool
141+
*/
142+
bool match(std::basic_string_view<Ch> s) noexcept {
143+
if (input_.compare(index_, s.size(), s) != 0) {
144+
return false;
145+
}
146+
147+
index_ += s.size();
148+
return true;
149+
}
150+
151+
/**
152+
* @brief Matches until the end of the input and returns the string matched
153+
*
154+
* @return std::optional<std::basic_string<Ch>>
155+
*/
156+
std::optional<std::basic_string<Ch>> match_any() {
157+
if (eof()) {
158+
return {};
159+
}
160+
161+
std::basic_string<Ch> m = input_.substr(index_);
162+
index_ += m.size();
163+
return m;
164+
}
165+
166+
/**
167+
* @brief Returns the matching string and advances the position
168+
* if the next sequences of characters matches <regex>
169+
*
170+
* @param s
171+
* @return bool
172+
*/
173+
std::optional<std::basic_string<Ch>> match(const std::basic_regex<Ch> &regex) {
174+
std::match_results<const Ch *> matches;
175+
176+
const Ch *first = &input_[index_];
177+
const Ch *last = &input_[input_.size()];
178+
179+
if (std::regex_search(first, last, matches, regex, std::regex_constants::match_continuous)) {
180+
std::basic_string<Ch> m(matches[0].first, matches[0].second);
181+
index_ += m.size();
182+
return m;
183+
}
184+
185+
return {};
186+
}
187+
188+
/**
189+
* @brief Returns the matching string and advances the position
190+
* for each character satisfying the given predicate
191+
*
192+
* @param pred
193+
* @return std::optional<std::basic_string<Ch>>
194+
*/
195+
template <class Pred>
196+
std::optional<std::basic_string<Ch>> match_while(Pred pred) {
197+
198+
size_t start = index_;
199+
while (!eof()) {
200+
const Ch ch = peek();
201+
if (!pred(ch)) {
202+
break;
203+
}
204+
205+
++index_;
206+
}
207+
208+
std::basic_string<Ch> m(&input_[start], &input_[index_]);
209+
if (!m.empty()) {
210+
return m;
211+
}
212+
213+
return {};
214+
}
215+
216+
/**
217+
* @brief Returns the current position in the string
218+
*
219+
* @return size_t
220+
*/
221+
size_t index() const noexcept {
222+
return index_;
223+
}
224+
225+
/**
226+
* @brief Returns the current position in the string as a line/column pair
227+
*
228+
* @return Location
229+
*/
230+
Location location() const noexcept {
231+
return location(index_);
232+
}
233+
234+
/**
235+
* @brief Returns the position of <index> in the string as line/column pair
236+
*
237+
* @param index
238+
* @return Location
239+
*/
240+
Location location(size_t index) const noexcept {
241+
size_t line = 1;
242+
size_t col = 1;
243+
244+
if (index < input_.size()) {
245+
246+
for (size_t i = 0; i < index; ++i) {
247+
if (input_[i] == '\n') {
248+
++line;
249+
col = 1;
250+
} else {
251+
++col;
252+
}
253+
}
254+
}
255+
256+
return Location{line, col};
257+
}
258+
259+
/**
260+
* @brief Stores the current state of the reader onto the stack
261+
*
262+
*/
263+
void push_state() {
264+
state_.push(index_);
265+
}
266+
267+
/**
268+
* @brief Removes the most recently pushed state from the stack
269+
*
270+
*/
271+
void pop_state() {
272+
assert(!state_.empty());
273+
state_.pop();
274+
}
275+
276+
/**
277+
* @brief Sets the current state to the most recently pushed state from the stack, and then
278+
* removes the most recently pushed state from the stack
279+
*
280+
*/
281+
void restore_state() {
282+
assert(!state_.empty());
283+
index_ = state_.top();
284+
state_.pop();
285+
}
286+
287+
private:
288+
std::basic_string_view<Ch> input_;
289+
size_t index_ = 0;
290+
std::stack<size_t> state_;
291+
};
292+
293+
using Reader = BasicReader<char>;
294+
295+
#endif

0 commit comments

Comments
 (0)