From ca1cb41fff2d0bdcd7c006cf9315404ca98f27d2 Mon Sep 17 00:00:00 2001 From: biqiboqi Date: Mon, 13 Oct 2025 01:28:24 +0300 Subject: [PATCH 1/2] feat: add lexer implementation --- bin/main.cpp | 26 ++++- lib/lexer/Lexer.cpp | 131 +++++++++++++++++++++++ lib/lexer/Lexer.hpp | 75 +++++++++++++ lib/lexer/LexerError.hpp | 11 ++ lib/lexer/handlers/CharHandler.cpp | 40 +++++++ lib/lexer/handlers/CharHandler.hpp | 12 +++ lib/lexer/handlers/DefaultHandler.cpp | 6 ++ lib/lexer/handlers/DefaultHandler.hpp | 11 ++ lib/lexer/handlers/Handler.hpp | 18 ++++ lib/lexer/handlers/IdentifierHandler.cpp | 18 ++++ lib/lexer/handlers/IdentifierHandler.hpp | 11 ++ lib/lexer/handlers/NewlineHandler.cpp | 5 + lib/lexer/handlers/NewlineHandler.hpp | 11 ++ lib/lexer/handlers/NumberHandler.cpp | 73 +++++++++++++ lib/lexer/handlers/NumberHandler.hpp | 11 ++ lib/lexer/handlers/OperatorHandler.cpp | 15 +++ lib/lexer/handlers/OperatorHandler.hpp | 11 ++ lib/lexer/handlers/PunctHandler.cpp | 6 ++ lib/lexer/handlers/PunctHandler.hpp | 11 ++ lib/lexer/handlers/SlashHandler.cpp | 32 ++++++ lib/lexer/handlers/SlashHandler.hpp | 11 ++ lib/lexer/handlers/StringHandler.cpp | 43 ++++++++ lib/lexer/handlers/StringHandler.hpp | 11 ++ lib/lexer/handlers/WhitespaceHandler.cpp | 12 +++ lib/lexer/handlers/WhitespaceHandler.hpp | 11 ++ lib/lexer/tokens/CommentToken.hpp | 42 ++++++++ lib/lexer/tokens/EofToken.hpp | 42 ++++++++ lib/lexer/tokens/IdentToken.hpp | 46 ++++++++ lib/lexer/tokens/KeywordToken.hpp | 41 +++++++ lib/lexer/tokens/LiteralToken.hpp | 55 ++++++++++ lib/lexer/tokens/NewlineToken.hpp | 41 +++++++ lib/lexer/tokens/OperatorToken.hpp | 42 ++++++++ lib/lexer/tokens/PunctToken.hpp | 44 ++++++++ lib/lexer/tokens/Token.hpp | 37 +++++++ lib/lexer/tokens/TokenFactory.hpp | 67 ++++++++++++ lib/lexer/tokens/TokenType.hpp | 43 ++++++++ lib/lexer/tokens/TokenVisitor.hpp | 25 +++++ lib/lexer/utils.cpp | 15 +++ lib/lexer/utils.hpp | 10 ++ lib/lexer/values/BoolValue.hpp | 24 +++++ lib/lexer/values/CharValue.hpp | 24 +++++ lib/lexer/values/FloatValue.hpp | 27 +++++ lib/lexer/values/IntValue.hpp | 23 ++++ lib/lexer/values/StringValue.hpp | 23 ++++ lib/lexer/values/Value.hpp | 14 +++ 45 files changed, 1303 insertions(+), 4 deletions(-) create mode 100644 lib/lexer/Lexer.cpp create mode 100644 lib/lexer/Lexer.hpp create mode 100644 lib/lexer/LexerError.hpp create mode 100644 lib/lexer/handlers/CharHandler.cpp create mode 100644 lib/lexer/handlers/CharHandler.hpp create mode 100644 lib/lexer/handlers/DefaultHandler.cpp create mode 100644 lib/lexer/handlers/DefaultHandler.hpp create mode 100644 lib/lexer/handlers/Handler.hpp create mode 100644 lib/lexer/handlers/IdentifierHandler.cpp create mode 100644 lib/lexer/handlers/IdentifierHandler.hpp create mode 100644 lib/lexer/handlers/NewlineHandler.cpp create mode 100644 lib/lexer/handlers/NewlineHandler.hpp create mode 100644 lib/lexer/handlers/NumberHandler.cpp create mode 100644 lib/lexer/handlers/NumberHandler.hpp create mode 100644 lib/lexer/handlers/OperatorHandler.cpp create mode 100644 lib/lexer/handlers/OperatorHandler.hpp create mode 100644 lib/lexer/handlers/PunctHandler.cpp create mode 100644 lib/lexer/handlers/PunctHandler.hpp create mode 100644 lib/lexer/handlers/SlashHandler.cpp create mode 100644 lib/lexer/handlers/SlashHandler.hpp create mode 100644 lib/lexer/handlers/StringHandler.cpp create mode 100644 lib/lexer/handlers/StringHandler.hpp create mode 100644 lib/lexer/handlers/WhitespaceHandler.cpp create mode 100644 lib/lexer/handlers/WhitespaceHandler.hpp create mode 100644 lib/lexer/tokens/CommentToken.hpp create mode 100644 lib/lexer/tokens/EofToken.hpp create mode 100644 lib/lexer/tokens/IdentToken.hpp create mode 100644 lib/lexer/tokens/KeywordToken.hpp create mode 100644 lib/lexer/tokens/LiteralToken.hpp create mode 100644 lib/lexer/tokens/NewlineToken.hpp create mode 100644 lib/lexer/tokens/OperatorToken.hpp create mode 100644 lib/lexer/tokens/PunctToken.hpp create mode 100644 lib/lexer/tokens/Token.hpp create mode 100644 lib/lexer/tokens/TokenFactory.hpp create mode 100644 lib/lexer/tokens/TokenType.hpp create mode 100644 lib/lexer/tokens/TokenVisitor.hpp create mode 100644 lib/lexer/utils.cpp create mode 100644 lib/lexer/utils.hpp create mode 100644 lib/lexer/values/BoolValue.hpp create mode 100644 lib/lexer/values/CharValue.hpp create mode 100644 lib/lexer/values/FloatValue.hpp create mode 100644 lib/lexer/values/IntValue.hpp create mode 100644 lib/lexer/values/StringValue.hpp create mode 100644 lib/lexer/values/Value.hpp diff --git a/bin/main.cpp b/bin/main.cpp index b5daa50..8d9ff6a 100644 --- a/bin/main.cpp +++ b/bin/main.cpp @@ -1,8 +1,26 @@ #include -#include "lib/ui/ui_functions.hpp" +#include "lib/lexer/Lexer.hpp" -int main(int32_t argc, char** argv) { - std::vector args = std::vector(argv, argv + argc); - return StartConsoleUI(args, std::cout); +int main() { + const std::string sample = R"ovum( +// demo +fun Main(args: StringArray): Int { + val count: Int = args.Length() + sys::Print("Args count: " + count.ToString()) + return 0 +} +)ovum"; + + Lexer lx(sample, false); + try { + auto toks = lx.tokenize(); + for (auto &t : toks) { + std::cout << t->to_string() << "\n"; + } + } catch (const std::exception &e) { + std::cerr << "Lexer error: " << e.what() << "\n"; + return 1; + } + return 0; } diff --git a/lib/lexer/Lexer.cpp b/lib/lexer/Lexer.cpp new file mode 100644 index 0000000..84b7c8c --- /dev/null +++ b/lib/lexer/Lexer.cpp @@ -0,0 +1,131 @@ +#include "Lexer.hpp" +#include "utils.hpp" + +Lexer::Lexer(std::string_view src, bool keep_comments) : + src_(src), keep_comments_(keep_comments), start_(0), current_(0), line_(1), col_(1), token_col_(1) { + register_defaults(); +} + +bool Lexer::is_at_end() const noexcept { + return current_ >= src_.size(); +} + +char Lexer::peek(size_t offset) const noexcept { + size_t idx = current_ + offset; + if (idx >= src_.size()) + return '\0'; + return src_[idx]; +} + +char Lexer::current_char() const noexcept { + if (current_ == 0) + return '\0'; + return src_[current_ - 1]; +} + +char Lexer::advance() { + if (is_at_end()) + return '\0'; + char c = src_[current_++]; + if (c == '\n') { + ++line_; + col_ = 1; + } else + ++col_; + return c; +} + +void Lexer::retreat_one() { + if (current_ == 0) + return; + --current_; + int l = 1; + for (size_t i = 0; i < current_; ++i) + if (src_[i] == '\n') + ++l; + line_ = l; + int col = 1; + for (size_t i = current_; i > 0; --i) { + if (src_[i - 1] == '\n') { + col = static_cast(current_ - i + 1); + break; + } + if (i == 1) + col = static_cast(current_); + } + col_ = col; +} + +void Lexer::consume_while(std::string &out, const std::function &pred) { + while (!is_at_end() && pred(peek())) { + out.push_back(advance()); + } +} + +std::string Lexer::raw_lexeme() const { + if (current_ >= start_) + return std::string(src_.substr(start_, current_ - start_)); + return {}; +} + +bool Lexer::is_keyword(std::string_view s) const { + return keyword_set().contains(std::string(s)); +} + +bool Lexer::is_multiop(std::string_view s) const { + return multi_ops_set().contains(std::string(s)); +} + +std::vector Lexer::tokenize() { + std::vector tokens; + while (!is_at_end()) { + start_ = current_; + token_col_ = col_; + char c = advance(); + Handler *h = handlers_[static_cast(c)].get(); + if (!h) + h = default_handler_.get(); + OptToken maybe = h->scan(*this); + if (maybe && *maybe) + tokens.push_back(std::move(*maybe)); + } + tokens.push_back(TokenFactory::make_eof(line_, col_)); + return tokens; +} + +void Lexer::register_defaults() { + for (auto &p : handlers_) + p.reset(); + default_handler_.reset(); + + set_handler(' ', std::make_unique()); + set_handler('\t', std::make_unique()); + set_handler('\r', std::make_unique()); + + set_handler('\n', std::make_unique()); + + for (unsigned char c = 'a'; c <= 'z'; ++c) + set_handler(c, std::make_unique()); + for (unsigned char c = 'A'; c <= 'Z'; ++c) + set_handler(c, std::make_unique()); + set_handler((unsigned char) '_', std::make_unique()); + + for (unsigned char d = '0'; d <= '9'; ++d) + set_handler(d, std::make_unique()); + set_handler((unsigned char) '.', std::make_unique()); + + set_handler((unsigned char) '"', std::make_unique()); + set_handler((unsigned char) '\'', std::make_unique()); + + set_handler((unsigned char) '/', std::make_unique()); + + const std::string opchars = "+-*/%<>=!&|^~?:."; + for (unsigned char c : opchars) + set_handler(c, std::make_unique()); + + const std::string puncts = ",;:(){}[]"; + for (unsigned char c : puncts) + set_handler(c, std::make_unique()); + + set_default_handler(std::make_unique()); +} diff --git a/lib/lexer/Lexer.hpp b/lib/lexer/Lexer.hpp new file mode 100644 index 0000000..d502b54 --- /dev/null +++ b/lib/lexer/Lexer.hpp @@ -0,0 +1,75 @@ +#ifndef LEXER_HPP_ +#define LEXER_HPP_ + +#include +#include +#include +#include +#include +#include +#include + +#include "handlers/CharHandler.hpp" +#include "handlers/DefaultHandler.hpp" +#include "handlers/Handler.hpp" +#include "handlers/IdentifierHandler.hpp" +#include "handlers/NewlineHandler.hpp" +#include "handlers/NumberHandler.hpp" +#include "handlers/OperatorHandler.hpp" +#include "handlers/PunctHandler.hpp" +#include "handlers/SlashHandler.hpp" +#include "handlers/StringHandler.hpp" +#include "handlers/WhitespaceHandler.hpp" + +#include "tokens/TokenFactory.hpp" + +class Lexer { +public: + explicit Lexer(std::string_view src, bool keep_comments = false); + + std::vector tokenize(); + + bool is_at_end() const noexcept; + char peek(size_t offset = 0) const noexcept; + char current_char() const noexcept; + char advance(); + void retreat_one(); + void consume_while(std::string &out, const std::function &pred); + std::string raw_lexeme() const; + + int line() const noexcept { + return line_; + } + int token_col() const noexcept { + return token_col_; + } + bool keep_comments() const noexcept { + return keep_comments_; + } + bool is_keyword(std::string_view s) const; + bool is_multiop(std::string_view s) const; + + void set_handler(unsigned char c, std::unique_ptr handler) { + handlers_[c] = std::move(handler); + } + void set_default_handler(std::unique_ptr handler) { + default_handler_ = std::move(handler); + } + +private: + void register_defaults(); + + std::string_view src_; + bool keep_comments_; + + size_t start_{0}; + size_t current_{0}; + int line_{1}; + int col_{1}; + int token_col_{1}; + + std::array, 256> handlers_; + std::unique_ptr default_handler_; +}; + +#endif // LEXER_HPP_ diff --git a/lib/lexer/LexerError.hpp b/lib/lexer/LexerError.hpp new file mode 100644 index 0000000..0c55e8a --- /dev/null +++ b/lib/lexer/LexerError.hpp @@ -0,0 +1,11 @@ +#ifndef LEXERERROR_HPP_ +#define LEXERERROR_HPP_ + +#include + +class LexerError : public std::runtime_error { +public: + using std::runtime_error::runtime_error; +}; + +#endif // LEXERERROR_HPP_ diff --git a/lib/lexer/handlers/CharHandler.cpp b/lib/lexer/handlers/CharHandler.cpp new file mode 100644 index 0000000..a673b0f --- /dev/null +++ b/lib/lexer/handlers/CharHandler.cpp @@ -0,0 +1,40 @@ +#include "CharHandler.hpp" + +OptToken CharHandler::scan(Lexer &lx) { + std::string raw; + raw.push_back('\''); + char val = '\0'; + if (lx.peek() == '\\') { + lx.advance(); + raw.push_back('\\'); + char e = lx.advance(); + raw.push_back(e); + switch (e) { + case 'n': + val = '\n'; + break; + case 't': + val = '\t'; + break; + case '\\': + val = '\\'; + break; + case '\'': + val = '\''; + break; + default: + val = e; + break; + } + } else { + char c = lx.advance(); + raw.push_back(c); + val = c; + } + if (lx.peek() == '\'') { + lx.advance(); + raw.push_back('\''); + } else + throw LexerError("Unterminated char literal"); + return std::make_optional(TokenFactory::make_char_literal(std::move(raw), val, lx.line(), lx.token_col())); +} diff --git a/lib/lexer/handlers/CharHandler.hpp b/lib/lexer/handlers/CharHandler.hpp new file mode 100644 index 0000000..7692230 --- /dev/null +++ b/lib/lexer/handlers/CharHandler.hpp @@ -0,0 +1,12 @@ +#ifndef CHARHANDLER_HPP_ +#define CHARHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" +#include "lib/lexer/LexerError.hpp" + +struct CharHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // CHARHANDLER_HPP_ diff --git a/lib/lexer/handlers/DefaultHandler.cpp b/lib/lexer/handlers/DefaultHandler.cpp new file mode 100644 index 0000000..745428f --- /dev/null +++ b/lib/lexer/handlers/DefaultHandler.cpp @@ -0,0 +1,6 @@ +#include "DefaultHandler.hpp" + +OptToken DefaultHandler::scan(Lexer &lx) { + char c = lx.current_char(); + throw LexerError(std::string("Unexpected character: ") + c); +} diff --git a/lib/lexer/handlers/DefaultHandler.hpp b/lib/lexer/handlers/DefaultHandler.hpp new file mode 100644 index 0000000..b7f635a --- /dev/null +++ b/lib/lexer/handlers/DefaultHandler.hpp @@ -0,0 +1,11 @@ +#ifndef DEFAULTHANDLER_HPP_ +#define DEFAULTHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct DefaultHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // DEFAULTHANDLER_HPP_ diff --git a/lib/lexer/handlers/Handler.hpp b/lib/lexer/handlers/Handler.hpp new file mode 100644 index 0000000..66dba14 --- /dev/null +++ b/lib/lexer/handlers/Handler.hpp @@ -0,0 +1,18 @@ +#ifndef HANDLER_HPP_ +#define HANDLER_HPP_ + +#include +#include + +class Token; +class Lexer; + +using TokenPtr = std::unique_ptr; +using OptToken = std::optional; + +struct Handler { + virtual ~Handler() = default; + virtual OptToken scan(Lexer &lx) = 0; +}; + +#endif // HANDLER_HPP_ diff --git a/lib/lexer/handlers/IdentifierHandler.cpp b/lib/lexer/handlers/IdentifierHandler.cpp new file mode 100644 index 0000000..74d7c23 --- /dev/null +++ b/lib/lexer/handlers/IdentifierHandler.cpp @@ -0,0 +1,18 @@ +#include "IdentifierHandler.hpp" + +OptToken IdentifierHandler::scan(Lexer &lx) { + std::string s; + s.push_back(lx.current_char()); + lx.consume_while(s, [](char ch) { return std::isalnum(static_cast(ch)) || ch == '_'; }); + + if (lx.is_keyword(s)) { + if (s == "true" || s == "false") { + return std::make_optional(TokenFactory::make_bool_literal(s, s == "true", lx.line(), lx.token_col())); + } + return std::make_optional(TokenFactory::make_keyword(std::move(s), lx.line(), lx.token_col())); + } + if (s == "xor") { + return std::make_optional(TokenFactory::make_operator(std::move(s), lx.line(), lx.token_col())); + } + return std::make_optional(TokenFactory::make_ident(std::move(s), lx.line(), lx.token_col())); +} diff --git a/lib/lexer/handlers/IdentifierHandler.hpp b/lib/lexer/handlers/IdentifierHandler.hpp new file mode 100644 index 0000000..6099fce --- /dev/null +++ b/lib/lexer/handlers/IdentifierHandler.hpp @@ -0,0 +1,11 @@ +#ifndef IDENTIFIERHANDLER_HPP_ +#define IDENTIFIERHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct IdentifierHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // IDENTIFIERHANDLER_HPP_ diff --git a/lib/lexer/handlers/NewlineHandler.cpp b/lib/lexer/handlers/NewlineHandler.cpp new file mode 100644 index 0000000..e505128 --- /dev/null +++ b/lib/lexer/handlers/NewlineHandler.cpp @@ -0,0 +1,5 @@ +#include "NewlineHandler.hpp" + +OptToken NewlineHandler::scan(Lexer &lx) { + return std::make_optional(TokenFactory::make_newline(lx.line(), lx.token_col())); +} diff --git a/lib/lexer/handlers/NewlineHandler.hpp b/lib/lexer/handlers/NewlineHandler.hpp new file mode 100644 index 0000000..8f60524 --- /dev/null +++ b/lib/lexer/handlers/NewlineHandler.hpp @@ -0,0 +1,11 @@ +#ifndef NEWLINEHANDLER_HPP_ +#define NEWLINEHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct NewlineHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // NEWLINEHANDLER_HPP_ diff --git a/lib/lexer/handlers/NumberHandler.cpp b/lib/lexer/handlers/NumberHandler.cpp new file mode 100644 index 0000000..f6de112 --- /dev/null +++ b/lib/lexer/handlers/NumberHandler.cpp @@ -0,0 +1,73 @@ +#include "NumberHandler.hpp" + +static bool is_digit_char(char c) { + return std::isdigit(static_cast(c)); +} + +OptToken NumberHandler::scan(Lexer &lx) { + std::string raw; + char first = lx.current_char(); + if (first == '.') { + raw.push_back('.'); + lx.consume_while(raw, [](char c) { return std::isdigit(static_cast(c)); }); + if (lx.peek() == 'e' || lx.peek() == 'E') { + raw.push_back(lx.advance()); + if (lx.peek() == '+' || lx.peek() == '-') + raw.push_back(lx.advance()); + if (!is_digit_char(lx.peek())) + throw LexerError("Malformed exponent"); + lx.consume_while(raw, [](char ch) { return std::isdigit(static_cast(ch)); }); + } + long double v; + try { + v = std::stold(raw); + } catch (...) { + throw LexerError(std::string("Malformed float literal: ") + raw); + } + return std::make_optional(TokenFactory::make_float_literal(std::move(raw), v, lx.line(), lx.token_col())); + } else { + lx.retreat_one(); + lx.consume_while(raw, [](char c) { return std::isdigit(static_cast(c)); }); + if (lx.peek() == '.' && std::isdigit(static_cast(lx.peek(1)))) { + raw.push_back(lx.advance()); // dot + lx.consume_while(raw, [](char c) { return std::isdigit(static_cast(c)); }); + if (lx.peek() == 'e' || lx.peek() == 'E') { + raw.push_back(lx.advance()); + if (lx.peek() == '+' || lx.peek() == '-') + raw.push_back(lx.advance()); + if (!is_digit_char(lx.peek())) + throw LexerError("Malformed exponent"); + lx.consume_while(raw, [](char ch) { return std::isdigit(static_cast(ch)); }); + } + long double v; + try { + v = std::stold(raw); + } catch (...) { + throw LexerError(std::string("Malformed float literal: ") + raw); + } + return std::make_optional(TokenFactory::make_float_literal(std::move(raw), v, lx.line(), lx.token_col())); + } + if (lx.peek() == 'e' || lx.peek() == 'E') { + raw.push_back(lx.advance()); + if (lx.peek() == '+' || lx.peek() == '-') + raw.push_back(lx.advance()); + if (!is_digit_char(lx.peek())) + throw LexerError("Malformed exponent"); + lx.consume_while(raw, [](char ch) { return std::isdigit(static_cast(ch)); }); + long double v; + try { + v = std::stold(raw); + } catch (...) { + throw LexerError(std::string("Malformed float literal: ") + raw); + } + return std::make_optional(TokenFactory::make_float_literal(std::move(raw), v, lx.line(), lx.token_col())); + } + long long vi; + try { + vi = std::stoll(raw); + } catch (...) { + throw LexerError(std::string("Malformed integer literal: ") + raw); + } + return std::make_optional(TokenFactory::make_int_literal(std::move(raw), vi, lx.line(), lx.token_col())); + } +} diff --git a/lib/lexer/handlers/NumberHandler.hpp b/lib/lexer/handlers/NumberHandler.hpp new file mode 100644 index 0000000..638bdb4 --- /dev/null +++ b/lib/lexer/handlers/NumberHandler.hpp @@ -0,0 +1,11 @@ +#ifndef NUMBERHANDLER_HPP_ +#define NUMBERHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct NumberHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // NUMBERHANDLER_HPP_ diff --git a/lib/lexer/handlers/OperatorHandler.cpp b/lib/lexer/handlers/OperatorHandler.cpp new file mode 100644 index 0000000..d53e3cb --- /dev/null +++ b/lib/lexer/handlers/OperatorHandler.cpp @@ -0,0 +1,15 @@ +#include "OperatorHandler.hpp" + +OptToken OperatorHandler::scan(Lexer &lx) { + std::string op; + op.push_back(lx.current_char()); + char p = lx.peek(); + if (p != '\0') { + std::string two = op + p; + if (lx.is_multiop(two)) { + lx.advance(); + op = two; + } + } + return std::make_optional(TokenFactory::make_operator(std::move(op), lx.line(), lx.token_col())); +} diff --git a/lib/lexer/handlers/OperatorHandler.hpp b/lib/lexer/handlers/OperatorHandler.hpp new file mode 100644 index 0000000..fbb30d3 --- /dev/null +++ b/lib/lexer/handlers/OperatorHandler.hpp @@ -0,0 +1,11 @@ +#ifndef OPERATORHANDLER_HPP_ +#define OPERATORHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct OperatorHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // OPERATORHANDLER_HPP_ diff --git a/lib/lexer/handlers/PunctHandler.cpp b/lib/lexer/handlers/PunctHandler.cpp new file mode 100644 index 0000000..dbdbd27 --- /dev/null +++ b/lib/lexer/handlers/PunctHandler.cpp @@ -0,0 +1,6 @@ +#include "PunctHandler.hpp" + +OptToken PunctHandler::scan(Lexer &lx) { + char c = lx.current_char(); + return std::make_optional(TokenFactory::make_punct(c, lx.line(), lx.token_col())); +} diff --git a/lib/lexer/handlers/PunctHandler.hpp b/lib/lexer/handlers/PunctHandler.hpp new file mode 100644 index 0000000..e0a9cfe --- /dev/null +++ b/lib/lexer/handlers/PunctHandler.hpp @@ -0,0 +1,11 @@ +#ifndef PUNCTHANDLER_HPP_ +#define PUNCTHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct PunctHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // PUNCTHANDLER_HPP_ diff --git a/lib/lexer/handlers/SlashHandler.cpp b/lib/lexer/handlers/SlashHandler.cpp new file mode 100644 index 0000000..9680147 --- /dev/null +++ b/lib/lexer/handlers/SlashHandler.cpp @@ -0,0 +1,32 @@ +#include "SlashHandler.hpp" + +OptToken SlashHandler::scan(Lexer &lx) { + if (lx.peek() == '/') { + std::string txt; + while (!lx.is_at_end() && lx.peek() != '\n') + txt.push_back(lx.advance()); + if (lx.keep_comments()) + return std::make_optional(TokenFactory::make_comment(std::move(txt), lx.line(), lx.token_col())); + return std::nullopt; + } else if (lx.peek() == '*') { + lx.advance(); + std::string txt; + bool closed = false; + while (!lx.is_at_end()) { + char c = lx.advance(); + if (c == '*' && lx.peek() == '/') { + lx.advance(); + closed = true; + break; + } + txt.push_back(c); + } + if (!closed) + throw LexerError("Unterminated block comment"); + if (lx.keep_comments()) + return std::make_optional(TokenFactory::make_comment(std::move(txt), lx.line(), lx.token_col())); + return std::nullopt; + } else { + return std::make_optional(TokenFactory::make_operator(std::string(1, '/'), lx.line(), lx.token_col())); + } +} diff --git a/lib/lexer/handlers/SlashHandler.hpp b/lib/lexer/handlers/SlashHandler.hpp new file mode 100644 index 0000000..981f07c --- /dev/null +++ b/lib/lexer/handlers/SlashHandler.hpp @@ -0,0 +1,11 @@ +#ifndef SLASHHANDLER_HPP_ +#define SLASHHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct SlashHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // SLASHHANDLER_HPP_ diff --git a/lib/lexer/handlers/StringHandler.cpp b/lib/lexer/handlers/StringHandler.cpp new file mode 100644 index 0000000..c31621f --- /dev/null +++ b/lib/lexer/handlers/StringHandler.cpp @@ -0,0 +1,43 @@ +#include "StringHandler.hpp" + +OptToken StringHandler::scan(Lexer &lx) { + std::string raw; + std::string out; + raw.push_back('"'); + while (!lx.is_at_end()) { + char c = lx.advance(); + raw.push_back(c); + if (c == '"') + break; + if (c == '\\') { + char e = lx.advance(); + raw.push_back(e); + switch (e) { + case 'n': + out.push_back('\n'); + break; + case 't': + out.push_back('\t'); + break; + case 'r': + out.push_back('\r'); + break; + case '\\': + out.push_back('\\'); + break; + case '"': + out.push_back('"'); + break; + default: + out.push_back(e); + break; + } + } else { + if (c == '\n') + throw LexerError("Unterminated string literal (newline inside)"); + out.push_back(c); + } + } + return std::make_optional( + TokenFactory::make_string_literal(std::move(raw), std::move(out), lx.line(), lx.token_col())); +} diff --git a/lib/lexer/handlers/StringHandler.hpp b/lib/lexer/handlers/StringHandler.hpp new file mode 100644 index 0000000..b3bd2e4 --- /dev/null +++ b/lib/lexer/handlers/StringHandler.hpp @@ -0,0 +1,11 @@ +#ifndef STRINGHANDLER_H +#define STRINGHANDLER_H + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct StringHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // STRINGHANDLER_H diff --git a/lib/lexer/handlers/WhitespaceHandler.cpp b/lib/lexer/handlers/WhitespaceHandler.cpp new file mode 100644 index 0000000..68c59fc --- /dev/null +++ b/lib/lexer/handlers/WhitespaceHandler.cpp @@ -0,0 +1,12 @@ +#include "WhitespaceHandler.hpp" + +OptToken WhitespaceHandler::scan(Lexer &lx) { + while (!lx.is_at_end()) { + if (char p = lx.peek(); p == ' ' || p == '\t' || p == '\r') { + lx.advance(); + continue; + } + break; + } + return std::nullopt; +} diff --git a/lib/lexer/handlers/WhitespaceHandler.hpp b/lib/lexer/handlers/WhitespaceHandler.hpp new file mode 100644 index 0000000..78a21d9 --- /dev/null +++ b/lib/lexer/handlers/WhitespaceHandler.hpp @@ -0,0 +1,11 @@ +#ifndef WHITESPACEHANDLER_HPP_ +#define WHITESPACEHANDLER_HPP_ + +#include "Handler.hpp" +#include "lib/lexer/Lexer.hpp" + +struct WhitespaceHandler : Handler { + OptToken scan(Lexer &lx) override; +}; + +#endif // WHITESPACEHANDLER_HPP_ diff --git a/lib/lexer/tokens/CommentToken.hpp b/lib/lexer/tokens/CommentToken.hpp new file mode 100644 index 0000000..eb1c457 --- /dev/null +++ b/lib/lexer/tokens/CommentToken.hpp @@ -0,0 +1,42 @@ +#ifndef COMMENTTOKEN_HPP_ +#define COMMENTTOKEN_HPP_ + +#include +#include +#include +#include + +#include "Token.hpp" +#include "TokenType.hpp" +#include "TokenVisitor.hpp" + +class CommentToken final : public Token { +public: + CommentToken(std::string txt, int line, int col) : Token(line, col), text_(std::move(txt)) { + } + + TokenType type() const noexcept override { + return TokenType::COMMENT; + } + std::string lexeme() const noexcept override { + return text_; + } + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(COMMENT, '" << text_ << "', @" << this->line() << ":" << this->column() << ")"; + return os.str(); + } + +private: + std::string text_; +}; + +#endif // COMMENTTOKEN_HPP_ diff --git a/lib/lexer/tokens/EofToken.hpp b/lib/lexer/tokens/EofToken.hpp new file mode 100644 index 0000000..f7e154f --- /dev/null +++ b/lib/lexer/tokens/EofToken.hpp @@ -0,0 +1,42 @@ +#ifndef EOFTOKEN_HPP_ +#define EOFTOKEN_HPP_ + +#include +#include +#include +#include + +#include "Token.hpp" +#include "TokenType.hpp" +#include "TokenVisitor.hpp" + +class EofToken final : public Token { +public: + EofToken(int line, int col) : Token(line, col), lexeme_("") { + } + + TokenType type() const noexcept override { + return TokenType::EOF_T; + } + std::string lexeme() const noexcept override { + return lexeme_; + } + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(EOF, @" << this->line() << ":" << this->column() << ")"; + return os.str(); + } + +private: + std::string lexeme_; +}; + +#endif // EOFTOKEN_HPP_ diff --git a/lib/lexer/tokens/IdentToken.hpp b/lib/lexer/tokens/IdentToken.hpp new file mode 100644 index 0000000..f2e3d9c --- /dev/null +++ b/lib/lexer/tokens/IdentToken.hpp @@ -0,0 +1,46 @@ +#ifndef IDENTTOKEN_HPP_ +#define IDENTTOKEN_HPP_ + +#include +#include +#include + +#include "Token.hpp" +#include "TokenVisitor.hpp" + +class IdentToken final : public Token { +public: + IdentToken(std::string lex, int line, int col) : Token(line, col), lexeme_(std::move(lex)) { + } + + TokenType type() const noexcept override { + return TokenType::IDENT; + } + std::string lexeme() const noexcept override { + return lexeme_; + } + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(IDENT, '" << lexeme_ << "', @"; + os << this->line() << ":" << this->column() << ")"; + return os.str(); + } + + const std::string& name() const noexcept { + return lexeme_; + } + +private: + std::string lexeme_; +}; + +#endif // IDENTTOKEN_HPP_ diff --git a/lib/lexer/tokens/KeywordToken.hpp b/lib/lexer/tokens/KeywordToken.hpp new file mode 100644 index 0000000..54f151d --- /dev/null +++ b/lib/lexer/tokens/KeywordToken.hpp @@ -0,0 +1,41 @@ +#ifndef KEYWORDTOKEN_HPP_ +#define KEYWORDTOKEN_HPP_ + +#include +#include +#include + +#include "Token.hpp" +#include "TokenVisitor.hpp" + +class KeywordToken final : public Token { +public: + KeywordToken(std::string lex, int line, int col) : Token(line, col), lexeme_(std::move(lex)) { + } + + TokenType type() const noexcept override { + return TokenType::KEYWORD; + } + std::string lexeme() const noexcept override { + return lexeme_; + } + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(KEYWORD, '" << lexeme_ << "', @"; + os << this->line() << ":" << this->column() << ")"; + return os.str(); + } + +private: + std::string lexeme_; +}; + +#endif // KEYWORDTOKEN_HPP_ diff --git a/lib/lexer/tokens/LiteralToken.hpp b/lib/lexer/tokens/LiteralToken.hpp new file mode 100644 index 0000000..eb90dd2 --- /dev/null +++ b/lib/lexer/tokens/LiteralToken.hpp @@ -0,0 +1,55 @@ +#ifndef LITERALTOKEN_HPP_ +#define LITERALTOKEN_HPP_ + +#include +#include + +#include "Token.hpp" +#include "TokenType.hpp" +#include "lib/lexer/values/Value.hpp" + +class LiteralToken final : public Token { +public: + LiteralToken(TokenType typ, std::string rawLexeme, std::unique_ptr value, int line, int col) : + Token(line, col), typ_(typ), lexeme_(std::move(rawLexeme)), value_(std::move(value)) { + } + + TokenType type() const noexcept override { + return typ_; + } + std::string lexeme() const noexcept override { + return lexeme_; + } + + const Value* value() const noexcept { + return value_.get(); + } + Value* value() noexcept { + return value_.get(); + } + + std::unique_ptr clone() const override { + std::unique_ptr vcopy = value_ ? value_->clone() : nullptr; + return std::make_unique(typ_, lexeme_, std::move(vcopy), this->line(), this->column()); + } + + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(" << to_string_view(typ_) << ", '" << lexeme_ << "'"; + if (value_) + os << ", " << value_->to_string(); + os << ", @" << this->line() << ":" << this->column() << ")"; + return os.str(); + } + +private: + TokenType typ_; + std::string lexeme_; + std::unique_ptr value_; +}; + +#endif // LITERALTOKEN_HPP_ diff --git a/lib/lexer/tokens/NewlineToken.hpp b/lib/lexer/tokens/NewlineToken.hpp new file mode 100644 index 0000000..5d514bd --- /dev/null +++ b/lib/lexer/tokens/NewlineToken.hpp @@ -0,0 +1,41 @@ +#ifndef NEWLINETOKEN_HPP_ +#define NEWLINETOKEN_HPP_ + +#include +#include +#include + +#include "Token.hpp" +#include "TokenType.hpp" +#include "TokenVisitor.hpp" + +class NewlineToken final : public Token { +public: + NewlineToken(int line, int col) : Token(line, col), lexeme_("\\n") { + } + + TokenType type() const noexcept override { + return TokenType::NEWLINE; + } + std::string lexeme() const noexcept override { + return lexeme_; + } + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(NEWLINE, '\\n', @" << this->line() << ":" << this->column() << ")"; + return os.str(); + } + +private: + std::string lexeme_; +}; + +#endif // NEWLINETOKEN_HPP_ diff --git a/lib/lexer/tokens/OperatorToken.hpp b/lib/lexer/tokens/OperatorToken.hpp new file mode 100644 index 0000000..dd41c2d --- /dev/null +++ b/lib/lexer/tokens/OperatorToken.hpp @@ -0,0 +1,42 @@ +#ifndef OPERATORTOKEN_HPP_ +#define OPERATORTOKEN_HPP_ + +#include +#include +#include + +#include "Token.hpp" +#include "TokenType.hpp" + +struct TokenVisitor; +class OperatorToken final : public Token { +public: + OperatorToken(std::string op, int line, int col) : Token(line, col), lexeme_(std::move(op)) { + } + + TokenType type() const noexcept override { + return TokenType::OPERATOR; + } + std::string lexeme() const noexcept override { + return lexeme_; + } + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(OPERATOR, '" << lexeme_ << "', @"; + os << this->line() << ":" << this->column() << ")"; + return os.str(); + } + +private: + std::string lexeme_; +}; + +#endif // OPERATORTOKEN_HPP_ diff --git a/lib/lexer/tokens/PunctToken.hpp b/lib/lexer/tokens/PunctToken.hpp new file mode 100644 index 0000000..fdb9b41 --- /dev/null +++ b/lib/lexer/tokens/PunctToken.hpp @@ -0,0 +1,44 @@ +#ifndef PUNCTTOKEN_HPP_ +#define PUNCTTOKEN_HPP_ +#include +#include +#include + +#include "Token.hpp" +#include "TokenType.hpp" +#include "TokenVisitor.hpp" + +class PunctToken final : public Token { +public: + PunctToken(char ch, int line, int col) : Token(line, col), lexeme_(1, ch) { + } + + PunctToken(std::string punct, int line, int col) : Token(line, col), lexeme_(std::move(punct)) { + } + + TokenType type() const noexcept override { + return TokenType::PUNCT; + } + std::string lexeme() const noexcept override { + return lexeme_; + } + + std::unique_ptr clone() const override { + return std::make_unique(*this); + } + void accept(TokenVisitor& visitor) const override { + visitor.visit(*this); + } + + std::string to_string() const override { + std::ostringstream os; + os << "Token(PUNCT, '" << lexeme_ << "', @"; + os << this->line() << ":" << this->column() << ")"; + return os.str(); + } + +private: + std::string lexeme_; +}; + +#endif // PUNCTTOKEN_HPP_ diff --git a/lib/lexer/tokens/Token.hpp b/lib/lexer/tokens/Token.hpp new file mode 100644 index 0000000..a47b3ba --- /dev/null +++ b/lib/lexer/tokens/Token.hpp @@ -0,0 +1,37 @@ +#ifndef TOKEN_HPP_ +#define TOKEN_HPP_ + +#include +#include +#include + +#include "TokenType.hpp" +#include "TokenVisitor.hpp" + +class Token { +public: + Token(int line, int column) : line_(line), column_(column) { + } + virtual ~Token() = default; + virtual TokenType type() const = 0; + virtual std::string lexeme() const = 0; + virtual std::unique_ptr clone() const = 0; + virtual void accept(TokenVisitor& v) const = 0; + virtual std::string to_string() const { + std::ostringstream os; + os << "Token(" << to_string_view(type()) << ", '" << lexeme() << "', @" << line_ << ":" << column_ << ")"; + return os.str(); + } + int line() const noexcept { + return line_; + } + int column() const noexcept { + return column_; + } + +private: + int line_; + int column_; +}; + +#endif // TOKEN_HPP_ diff --git a/lib/lexer/tokens/TokenFactory.hpp b/lib/lexer/tokens/TokenFactory.hpp new file mode 100644 index 0000000..a226dbd --- /dev/null +++ b/lib/lexer/tokens/TokenFactory.hpp @@ -0,0 +1,67 @@ +#ifndef TOKENFACTORY_HPP_ +#define TOKENFACTORY_HPP_ + +#include +#include +#include + +#include "CommentToken.hpp" +#include "EofToken.hpp" +#include "IdentToken.hpp" +#include "KeywordToken.hpp" +#include "LiteralToken.hpp" +#include "NewlineToken.hpp" +#include "OperatorToken.hpp" +#include "PunctToken.hpp" +#include "Token.hpp" +#include "lib/lexer/values/BoolValue.hpp" +#include "lib/lexer/values/CharValue.hpp" +#include "lib/lexer/values/FloatValue.hpp" +#include "lib/lexer/values/IntValue.hpp" +#include "lib/lexer/values/StringValue.hpp" + +struct TokenFactory { + static std::unique_ptr make_ident(std::string lex, int line, int col) { + return std::make_unique(std::move(lex), line, col); + } + static std::unique_ptr make_keyword(std::string lex, int line, int col) { + return std::make_unique(std::move(lex), line, col); + } + static std::unique_ptr make_operator(std::string lex, int line, int col) { + return std::make_unique(std::move(lex), line, col); + } + static std::unique_ptr make_punct(char ch, int line, int col) { + return std::make_unique(ch, line, col); + } + static std::unique_ptr make_punct(std::string lex, int line, int col) { + return std::make_unique(std::move(lex), line, col); + } + static std::unique_ptr make_newline(int line, int col) { + return std::make_unique(line, col); + } + static std::unique_ptr make_comment(std::string text, int line, int col) { + return std::make_unique(std::move(text), line, col); + } + static std::unique_ptr make_eof(int line, int col) { + return std::make_unique(line, col); + } + + static std::unique_ptr make_int_literal(std::string raw, int64_t v, int line, int col) { + return std::make_unique(TokenType::INT, std::move(raw), std::make_unique(v), line, col); + } + static std::unique_ptr make_float_literal(std::string raw, long double v, int line, int col) { + return std::make_unique(TokenType::FLOAT, std::move(raw), std::make_unique(v), line, col); + } + static std::unique_ptr make_string_literal(std::string raw, std::string s, int line, int col) { + return std::make_unique( + TokenType::STRING, std::move(raw), std::make_unique(std::move(s)), line, col); + } + static std::unique_ptr make_char_literal(std::string raw, char c, int line, int col) { + return std::make_unique(TokenType::CHAR, std::move(raw), std::make_unique(c), line, col); + } + static std::unique_ptr make_bool_literal(std::string raw, bool b, int line, int col) { + return std::make_unique(TokenType::BOOL, std::move(raw), std::make_unique(b), line, col); + } +}; + +#endif // TOKENFACTORY_HPP_ diff --git a/lib/lexer/tokens/TokenType.hpp b/lib/lexer/tokens/TokenType.hpp new file mode 100644 index 0000000..f06098a --- /dev/null +++ b/lib/lexer/tokens/TokenType.hpp @@ -0,0 +1,43 @@ +#ifndef TOKENTYPE_HPP_ +#define TOKENTYPE_HPP_ + +#include + +enum class TokenType { + IDENT, KEYWORD, INT, FLOAT, + STRING, CHAR, BOOL, OPERATOR, + PUNCT, NEWLINE, COMMENT, EOF_T +}; + +inline std::string_view to_string_view(TokenType token_type) { + switch (token_type) { + case TokenType::IDENT: + return "IDENT"; + case TokenType::KEYWORD: + return "KEYWORD"; + case TokenType::INT: + return "INT"; + case TokenType::FLOAT: + return "FLOAT"; + case TokenType::STRING: + return "STRING"; + case TokenType::CHAR: + return "CHAR"; + case TokenType::BOOL: + return "BOOL"; + case TokenType::OPERATOR: + return "OPERATOR"; + case TokenType::PUNCT: + return "PUNCT"; + case TokenType::NEWLINE: + return "NEWLINE"; + case TokenType::COMMENT: + return "COMMENT"; + case TokenType::EOF_T: + return "EOF"; + default: + return "UNKNOWN"; + } +} + +#endif // TOKENTYPE_HPP_ diff --git a/lib/lexer/tokens/TokenVisitor.hpp b/lib/lexer/tokens/TokenVisitor.hpp new file mode 100644 index 0000000..9069afa --- /dev/null +++ b/lib/lexer/tokens/TokenVisitor.hpp @@ -0,0 +1,25 @@ +#ifndef TOKENVISITOR_HPP_ +#define TOKENVISITOR_HPP_ + +class EofToken; +class CommentToken; +class NewlineToken; +class PunctToken; +class OperatorToken; +class LiteralToken; +class KeywordToken; +class IdentToken; + +struct TokenVisitor { + virtual ~TokenVisitor() = default; + virtual void visit(const IdentToken& t) = 0; + virtual void visit(const KeywordToken& t) = 0; + virtual void visit(const LiteralToken& t) = 0; + virtual void visit(const OperatorToken& t) = 0; + virtual void visit(const PunctToken& t) = 0; + virtual void visit(const NewlineToken& t) = 0; + virtual void visit(const CommentToken& t) = 0; + virtual void visit(const EofToken& t) = 0; +}; + +#endif // TOKENVISITOR_HPP_ diff --git a/lib/lexer/utils.cpp b/lib/lexer/utils.cpp new file mode 100644 index 0000000..1665614 --- /dev/null +++ b/lib/lexer/utils.cpp @@ -0,0 +1,15 @@ +#include "utils.hpp" + +const std::unordered_set& keyword_set() { + static const std::unordered_set s = { + "fun", "class", "interface", "var", "override", "pure", "if", "else", + "for", "while", "return", "unsafe", "val", "static", "public", "private", + "implements", "as", "is", "null", "true", "false", "typealias", "destructor", + "call", "#import", "#define", "#undef", "#ifdef", "#ifndef", "#else", "#endif"}; + return s; +} + +const std::unordered_set& multi_ops_set() { + static const std::unordered_set s = {"==", "!=", "<=", ">=", "&&", "||", "?:", "?.", "::", ":="}; + return s; +} diff --git a/lib/lexer/utils.hpp b/lib/lexer/utils.hpp new file mode 100644 index 0000000..d250894 --- /dev/null +++ b/lib/lexer/utils.hpp @@ -0,0 +1,10 @@ +#ifndef UTILS_HPP_ +#define UTILS_HPP_ + +#include +#include + +const std::unordered_set& keyword_set(); +const std::unordered_set& multi_ops_set(); + +#endif // UTILS_HPP_ diff --git a/lib/lexer/values/BoolValue.hpp b/lib/lexer/values/BoolValue.hpp new file mode 100644 index 0000000..d9af4cb --- /dev/null +++ b/lib/lexer/values/BoolValue.hpp @@ -0,0 +1,24 @@ +#ifndef BOOLVALUE_HPP_ +#define BOOLVALUE_HPP_ + +#include +#include + +#include "Value.hpp" + +struct BoolValue : Value { + bool v; + explicit BoolValue(bool b) : v(b) { + } + std::unique_ptr clone() const override { + return std::make_unique(v); + } + std::string to_string() const override { + return v ? "true" : "false"; + } + std::string type_name() const override { + return "Bool"; + } +}; + +#endif // BOOLVALUE_HPP_ diff --git a/lib/lexer/values/CharValue.hpp b/lib/lexer/values/CharValue.hpp new file mode 100644 index 0000000..1a80bdd --- /dev/null +++ b/lib/lexer/values/CharValue.hpp @@ -0,0 +1,24 @@ +#ifndef CHARVALUE_HPP_ +#define CHARVALUE_HPP_ + +#include +#include + +#include "Value.hpp" + +struct CharValue : Value { + char v; + explicit CharValue(char c) : v(c) { + } + std::unique_ptr clone() const override { + return std::make_unique(v); + } + std::string to_string() const override { + return std::string("'") + v + "'"; + } + std::string type_name() const override { + return "Char"; + } +}; + +#endif // CHARVALUE_HPP_ diff --git a/lib/lexer/values/FloatValue.hpp b/lib/lexer/values/FloatValue.hpp new file mode 100644 index 0000000..b7804b5 --- /dev/null +++ b/lib/lexer/values/FloatValue.hpp @@ -0,0 +1,27 @@ +#ifndef FLOATVALUE_HPP_ +#define FLOATVALUE_HPP_ + +#include +#include +#include + +#include "Value.hpp" + +struct FloatValue : Value { + long double v; + explicit FloatValue(long double x) : v(x) { + } + std::unique_ptr clone() const override { + return std::make_unique(v); + } + std::string to_string() const override { + std::ostringstream os; + os << static_cast(v); + return os.str(); + } + std::string type_name() const override { + return "Float"; + } +}; + +#endif // FLOATVALUE_HPP_ diff --git a/lib/lexer/values/IntValue.hpp b/lib/lexer/values/IntValue.hpp new file mode 100644 index 0000000..cc86b37 --- /dev/null +++ b/lib/lexer/values/IntValue.hpp @@ -0,0 +1,23 @@ +#ifndef INTVALUE_HPP_ +#define INTVALUE_HPP_ + +#include + +#include "Value.hpp" + +struct IntValue : Value { + int64_t v; + explicit IntValue(int64_t x) : v(x) { + } + std::unique_ptr clone() const override { + return std::make_unique(v); + } + std::string to_string() const override { + return std::to_string(v); + } + std::string type_name() const override { + return "Int"; + } +}; + +#endif // INTVALUE_HPP_ diff --git a/lib/lexer/values/StringValue.hpp b/lib/lexer/values/StringValue.hpp new file mode 100644 index 0000000..56cdfe9 --- /dev/null +++ b/lib/lexer/values/StringValue.hpp @@ -0,0 +1,23 @@ +#ifndef STRINGVALUE_HPP_ +#define STRINGVALUE_HPP_ + +#include + +#include "Value.hpp" + +struct StringValue : Value { + std::string v; + explicit StringValue(std::string s) : v(std::move(s)) { + } + std::unique_ptr clone() const override { + return std::make_unique(v); + } + std::string to_string() const override { + return std::string("\"") + v + "\""; + } + std::string type_name() const override { + return "String"; + } +}; + +#endif // STRINGVALUE_HPP_ diff --git a/lib/lexer/values/Value.hpp b/lib/lexer/values/Value.hpp new file mode 100644 index 0000000..4f33efa --- /dev/null +++ b/lib/lexer/values/Value.hpp @@ -0,0 +1,14 @@ +#ifndef VALUE_HPP_ +#define VALUE_HPP_ + +#include +#include + +struct Value { + virtual ~Value() = default; + virtual std::unique_ptr clone() const = 0; + virtual std::string to_string() const = 0; + virtual std::string type_name() const = 0; +}; + +#endif // VALUE_HPP_ From a4fcd21b187fee0d60fef86c955c686c190143ac Mon Sep 17 00:00:00 2001 From: biqiboqi Date: Mon, 13 Oct 2025 01:43:15 +0300 Subject: [PATCH 2/2] fix: fixed clang formating in enum --- lib/lexer/tokens/TokenType.hpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/lib/lexer/tokens/TokenType.hpp b/lib/lexer/tokens/TokenType.hpp index f06098a..3a61c85 100644 --- a/lib/lexer/tokens/TokenType.hpp +++ b/lib/lexer/tokens/TokenType.hpp @@ -3,11 +3,7 @@ #include -enum class TokenType { - IDENT, KEYWORD, INT, FLOAT, - STRING, CHAR, BOOL, OPERATOR, - PUNCT, NEWLINE, COMMENT, EOF_T -}; +enum class TokenType { IDENT, KEYWORD, INT, FLOAT, STRING, CHAR, BOOL, OPERATOR, PUNCT, NEWLINE, COMMENT, EOF_T }; inline std::string_view to_string_view(TokenType token_type) { switch (token_type) {