Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions bin/main.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
#include <iostream>

#include "lib/ui/ui_functions.hpp"
#include "lib/lexer/Lexer.hpp"

int main(int32_t argc, char** argv) {
std::vector<std::string> args = std::vector<std::string>(argv, argv + argc);
return StartConsoleUI(args, std::cout);
int main() {
const std::string sample = R"ovum(
// demo
fun Main(args: StringArray): Int {
val count: Int = args.Length()
sys::Print("Args count: " + count.ToString())
return 0
}
)ovum";

Lexer lx(sample, false);
try {
auto toks = lx.tokenize();
for (auto &t : toks) {
std::cout << t->to_string() << "\n";
}
} catch (const std::exception &e) {
std::cerr << "Lexer error: " << e.what() << "\n";
return 1;
}
return 0;
}
131 changes: 131 additions & 0 deletions lib/lexer/Lexer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#include "Lexer.hpp"
#include "utils.hpp"

Lexer::Lexer(std::string_view src, bool keep_comments) :
src_(src), keep_comments_(keep_comments), start_(0), current_(0), line_(1), col_(1), token_col_(1) {
register_defaults();
}

bool Lexer::is_at_end() const noexcept {
return current_ >= src_.size();
}

char Lexer::peek(size_t offset) const noexcept {
size_t idx = current_ + offset;
if (idx >= src_.size())
return '\0';
return src_[idx];
}

char Lexer::current_char() const noexcept {
if (current_ == 0)
return '\0';
return src_[current_ - 1];
}

char Lexer::advance() {
if (is_at_end())
return '\0';
char c = src_[current_++];
if (c == '\n') {
++line_;
col_ = 1;
} else
++col_;
return c;
}

void Lexer::retreat_one() {
if (current_ == 0)
return;
--current_;
int l = 1;
for (size_t i = 0; i < current_; ++i)
if (src_[i] == '\n')
++l;
line_ = l;
int col = 1;
for (size_t i = current_; i > 0; --i) {
if (src_[i - 1] == '\n') {
col = static_cast<int>(current_ - i + 1);
break;
}
if (i == 1)
col = static_cast<int>(current_);
}
col_ = col;
}

void Lexer::consume_while(std::string &out, const std::function<bool(char)> &pred) {
while (!is_at_end() && pred(peek())) {
out.push_back(advance());
}
}

std::string Lexer::raw_lexeme() const {
if (current_ >= start_)
return std::string(src_.substr(start_, current_ - start_));
return {};
}

bool Lexer::is_keyword(std::string_view s) const {
return keyword_set().contains(std::string(s));
}

bool Lexer::is_multiop(std::string_view s) const {
return multi_ops_set().contains(std::string(s));
}

std::vector<TokenPtr> Lexer::tokenize() {
std::vector<TokenPtr> tokens;
while (!is_at_end()) {
start_ = current_;
token_col_ = col_;
char c = advance();
Handler *h = handlers_[static_cast<unsigned char>(c)].get();
if (!h)
h = default_handler_.get();
OptToken maybe = h->scan(*this);
if (maybe && *maybe)
tokens.push_back(std::move(*maybe));
}
tokens.push_back(TokenFactory::make_eof(line_, col_));
return tokens;
}

void Lexer::register_defaults() {
for (auto &p : handlers_)
p.reset();
default_handler_.reset();

set_handler(' ', std::make_unique<WhitespaceHandler>());
set_handler('\t', std::make_unique<WhitespaceHandler>());
set_handler('\r', std::make_unique<WhitespaceHandler>());

set_handler('\n', std::make_unique<NewlineHandler>());

for (unsigned char c = 'a'; c <= 'z'; ++c)
set_handler(c, std::make_unique<IdentifierHandler>());
for (unsigned char c = 'A'; c <= 'Z'; ++c)
set_handler(c, std::make_unique<IdentifierHandler>());
set_handler((unsigned char) '_', std::make_unique<IdentifierHandler>());

for (unsigned char d = '0'; d <= '9'; ++d)
set_handler(d, std::make_unique<NumberHandler>());
set_handler((unsigned char) '.', std::make_unique<NumberHandler>());

set_handler((unsigned char) '"', std::make_unique<StringHandler>());
set_handler((unsigned char) '\'', std::make_unique<CharHandler>());

set_handler((unsigned char) '/', std::make_unique<SlashHandler>());

const std::string opchars = "+-*/%<>=!&|^~?:.";
for (unsigned char c : opchars)
set_handler(c, std::make_unique<OperatorHandler>());

const std::string puncts = ",;:(){}[]";
for (unsigned char c : puncts)
set_handler(c, std::make_unique<PunctHandler>());

set_default_handler(std::make_unique<DefaultHandler>());
}
75 changes: 75 additions & 0 deletions lib/lexer/Lexer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#ifndef LEXER_HPP_
#define LEXER_HPP_

#include <cctype>
#include <functional>
#include <stdexcept>
#include <string>
#include <string_view>
#include <unordered_set>
#include <vector>

#include "handlers/CharHandler.hpp"
#include "handlers/DefaultHandler.hpp"
#include "handlers/Handler.hpp"
#include "handlers/IdentifierHandler.hpp"
#include "handlers/NewlineHandler.hpp"
#include "handlers/NumberHandler.hpp"
#include "handlers/OperatorHandler.hpp"
#include "handlers/PunctHandler.hpp"
#include "handlers/SlashHandler.hpp"
#include "handlers/StringHandler.hpp"
#include "handlers/WhitespaceHandler.hpp"

#include "tokens/TokenFactory.hpp"

class Lexer {
public:
explicit Lexer(std::string_view src, bool keep_comments = false);

std::vector<TokenPtr> tokenize();

bool is_at_end() const noexcept;
char peek(size_t offset = 0) const noexcept;
char current_char() const noexcept;
char advance();
void retreat_one();
void consume_while(std::string &out, const std::function<bool(char)> &pred);
std::string raw_lexeme() const;

int line() const noexcept {
return line_;
}
int token_col() const noexcept {
return token_col_;
}
bool keep_comments() const noexcept {
return keep_comments_;
}
bool is_keyword(std::string_view s) const;
bool is_multiop(std::string_view s) const;

void set_handler(unsigned char c, std::unique_ptr<Handler> handler) {
handlers_[c] = std::move(handler);
}
void set_default_handler(std::unique_ptr<Handler> handler) {
default_handler_ = std::move(handler);
}

private:
void register_defaults();

std::string_view src_;
bool keep_comments_;

size_t start_{0};
size_t current_{0};
int line_{1};
int col_{1};
int token_col_{1};

std::array<std::unique_ptr<Handler>, 256> handlers_;
std::unique_ptr<Handler> default_handler_;
Comment on lines +4 to +72

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P0 Badge Include for handlers_ member

The header declares std::array<std::unique_ptr<Handler>, 256> handlers_; but the file only includes <vector> and other headers, not <array>. Any TU including this header will fail to compile because std::array is undefined. Add #include <array> above the class definition.

Useful? React with 👍 / 👎.

};

#endif // LEXER_HPP_
11 changes: 11 additions & 0 deletions lib/lexer/LexerError.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef LEXERERROR_HPP_
#define LEXERERROR_HPP_

#include <stdexcept>

class LexerError : public std::runtime_error {
public:
using std::runtime_error::runtime_error;
};

#endif // LEXERERROR_HPP_
40 changes: 40 additions & 0 deletions lib/lexer/handlers/CharHandler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#include "CharHandler.hpp"

OptToken CharHandler::scan(Lexer &lx) {
std::string raw;
raw.push_back('\'');
char val = '\0';
if (lx.peek() == '\\') {
lx.advance();
raw.push_back('\\');
char e = lx.advance();
raw.push_back(e);
switch (e) {
case 'n':
val = '\n';
break;
case 't':
val = '\t';
break;
case '\\':
val = '\\';
break;
case '\'':
val = '\'';
break;
default:
val = e;
break;
}
} else {
char c = lx.advance();
raw.push_back(c);
val = c;
}
if (lx.peek() == '\'') {
lx.advance();
raw.push_back('\'');
} else
throw LexerError("Unterminated char literal");
return std::make_optional(TokenFactory::make_char_literal(std::move(raw), val, lx.line(), lx.token_col()));
}
12 changes: 12 additions & 0 deletions lib/lexer/handlers/CharHandler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#ifndef CHARHANDLER_HPP_
#define CHARHANDLER_HPP_

#include "Handler.hpp"
#include "lib/lexer/Lexer.hpp"
#include "lib/lexer/LexerError.hpp"

struct CharHandler : Handler {
OptToken scan(Lexer &lx) override;
};

#endif // CHARHANDLER_HPP_
6 changes: 6 additions & 0 deletions lib/lexer/handlers/DefaultHandler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#include "DefaultHandler.hpp"

OptToken DefaultHandler::scan(Lexer &lx) {
char c = lx.current_char();
throw LexerError(std::string("Unexpected character: ") + c);
}
11 changes: 11 additions & 0 deletions lib/lexer/handlers/DefaultHandler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef DEFAULTHANDLER_HPP_
#define DEFAULTHANDLER_HPP_

#include "Handler.hpp"
#include "lib/lexer/Lexer.hpp"

struct DefaultHandler : Handler {
OptToken scan(Lexer &lx) override;
};

#endif // DEFAULTHANDLER_HPP_
18 changes: 18 additions & 0 deletions lib/lexer/handlers/Handler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef HANDLER_HPP_
#define HANDLER_HPP_

#include <memory>
#include <optional>

class Token;
class Lexer;

using TokenPtr = std::unique_ptr<Token>;
using OptToken = std::optional<TokenPtr>;

struct Handler {
virtual ~Handler() = default;
virtual OptToken scan(Lexer &lx) = 0;
};

#endif // HANDLER_HPP_
18 changes: 18 additions & 0 deletions lib/lexer/handlers/IdentifierHandler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "IdentifierHandler.hpp"

OptToken IdentifierHandler::scan(Lexer &lx) {
std::string s;
s.push_back(lx.current_char());
lx.consume_while(s, [](char ch) { return std::isalnum(static_cast<unsigned char>(ch)) || ch == '_'; });

if (lx.is_keyword(s)) {
if (s == "true" || s == "false") {
return std::make_optional(TokenFactory::make_bool_literal(s, s == "true", lx.line(), lx.token_col()));
}
return std::make_optional(TokenFactory::make_keyword(std::move(s), lx.line(), lx.token_col()));
}
if (s == "xor") {
return std::make_optional(TokenFactory::make_operator(std::move(s), lx.line(), lx.token_col()));
}
return std::make_optional(TokenFactory::make_ident(std::move(s), lx.line(), lx.token_col()));
}
11 changes: 11 additions & 0 deletions lib/lexer/handlers/IdentifierHandler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef IDENTIFIERHANDLER_HPP_
#define IDENTIFIERHANDLER_HPP_

#include "Handler.hpp"
#include "lib/lexer/Lexer.hpp"

struct IdentifierHandler : Handler {
OptToken scan(Lexer &lx) override;
};

#endif // IDENTIFIERHANDLER_HPP_
5 changes: 5 additions & 0 deletions lib/lexer/handlers/NewlineHandler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#include "NewlineHandler.hpp"

OptToken NewlineHandler::scan(Lexer &lx) {
return std::make_optional(TokenFactory::make_newline(lx.line(), lx.token_col()));
}
11 changes: 11 additions & 0 deletions lib/lexer/handlers/NewlineHandler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef NEWLINEHANDLER_HPP_
#define NEWLINEHANDLER_HPP_

#include "Handler.hpp"
#include "lib/lexer/Lexer.hpp"

struct NewlineHandler : Handler {
OptToken scan(Lexer &lx) override;
};

#endif // NEWLINEHANDLER_HPP_
Loading
Loading