diff --git a/conanfile.py b/conanfile.py index 2a01b519a..3ade456e5 100644 --- a/conanfile.py +++ b/conanfile.py @@ -27,6 +27,7 @@ def requirements(self): self.requires("fmt/10.2.1", force=True) self.requires("spdlog/1.15.0") self.requires("uvw/3.4.0") + self.requires("libnghttp2/1.61.0") self.requires("yaml-cpp/0.8.0") self.requires("robin-hood-hashing/3.11.5") self.requires("libcurl/8.11.1") diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index e6480f56e..fadb19e11 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -4,4 +4,5 @@ add_subdirectory(visor_test) add_subdirectory(visor_transaction) add_subdirectory(visor_tcp) add_subdirectory(visor_dns) -add_subdirectory(visor_utils) \ No newline at end of file +add_subdirectory(visor_utils) +add_subdirectory(visor_http_client) diff --git a/libs/visor_http_client/CMakeLists.txt b/libs/visor_http_client/CMakeLists.txt new file mode 100644 index 000000000..f6f2aa359 --- /dev/null +++ b/libs/visor_http_client/CMakeLists.txt @@ -0,0 +1,38 @@ +message(STATUS "Visor Lib HTTP Client") + +add_library(VisorLibHttpClient + httpssession.cpp + tcpsession.cpp + url_parser.c + ) +add_library(Visor::Lib::HttpClient ALIAS VisorLibHttpClient) + +target_include_directories(VisorLibHttpClient + PUBLIC + $ + ${CONAN_LIBS_LIBNGHTTP2} + ) + +find_package(libnghttp2 REQUIRED) +find_package(uvw REQUIRED) +find_package(OpenSSL REQUIRED) + +target_link_libraries(VisorLibHttpClient + PUBLIC + ${CONAN_LIBS_OPENSSL} + OpenSSL::SSL + uvw::uvw + libnghttp2::libnghttp2 + ) + +## TEST SUITE +add_executable(unit-tests-visor-http-client test_driver.cpp) + +target_link_libraries(unit-tests-visor-http-client + PRIVATE + Visor::Lib::HttpClient + ${CONAN_LIBS_CATCH2}) + +add_test(NAME unit-tests-visor-http-client + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/libs/visor_http_client + COMMAND unit-tests-visor-http-client) \ No newline at end of file diff --git a/libs/visor_http_client/LICENSE.txt b/libs/visor_http_client/LICENSE.txt new file mode 100644 index 000000000..f49a4e16e --- /dev/null +++ b/libs/visor_http_client/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/libs/visor_http_client/base64.cpp b/libs/visor_http_client/base64.cpp new file mode 100644 index 000000000..6eb8b1cc6 --- /dev/null +++ b/libs/visor_http_client/base64.cpp @@ -0,0 +1,120 @@ +/* + base64.cpp and base64.h + + base64 encoding and decoding with C++. + + Version: 1.01.00 + + Copyright (C) 2004-2017 René Nyffenegger + ALTERED by Jeroen Wijenbergh to allow for encoding of URLs + + This source code is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + + 3. This notice may not be removed or altered from any source distribution. + + René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +*/ + +#include "base64.h" +#include + +static const std::string base64_chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-_"; + + +static inline bool is_base64(unsigned char c) { + return (isalnum(c) || (c == '-') || (c == '_')); +} + +std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) { + std::string ret; + int i = 0; + int j = 0; + unsigned char char_array_3[3]; + unsigned char char_array_4[4]; + + while (in_len--) { + char_array_3[i++] = *(bytes_to_encode++); + if (i == 3) { + char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + char_array_4[3] = char_array_3[2] & 0x3f; + + for(i = 0; (i <4) ; i++) + ret += base64_chars[char_array_4[i]]; + i = 0; + } + } + + if (i) + { + for(j = i; j < 3; j++) + char_array_3[j] = '\0'; + + char_array_4[0] = ( char_array_3[0] & 0xfc) >> 2; + char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); + char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); + + for (j = 0; (j < i + 1); j++) + ret += base64_chars[char_array_4[j]]; + + } + + return ret; + +} + +std::string base64_decode(std::string const& encoded_string) { + size_t in_len = encoded_string.size(); + int i = 0; + int j = 0; + int in_ = 0; + unsigned char char_array_4[4], char_array_3[3]; + std::string ret; + + while (in_len-- && is_base64(encoded_string[in_])) { + char_array_4[i++] = encoded_string[in_]; in_++; + if (i ==4) { + for (i = 0; i <4; i++) + char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff; + + char_array_3[0] = ( char_array_4[0] << 2 ) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; + + for (i = 0; (i < 3); i++) + ret += char_array_3[i]; + i = 0; + } + } + + if (i) { + for (j = 0; j < i; j++) + char_array_4[j] = base64_chars.find(char_array_4[j]) & 0xff; + + char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); + char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); + + for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; + } + + return ret; +} diff --git a/libs/visor_http_client/base64.h b/libs/visor_http_client/base64.h new file mode 100644 index 000000000..14db18499 --- /dev/null +++ b/libs/visor_http_client/base64.h @@ -0,0 +1,14 @@ +// +// base64 encoding and decoding with C++. +// Version: 1.01.00 +// + +#ifndef BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A +#define BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A + +#include + +std::string base64_encode(unsigned char const* , unsigned int len); +std::string base64_decode(std::string const& s); + +#endif /* BASE64_H_C0CE2A47_D10E_42C9_A27C_C883944E704A */ \ No newline at end of file diff --git a/libs/visor_http_client/httpssession.cpp b/libs/visor_http_client/httpssession.cpp new file mode 100644 index 000000000..9433e3f79 --- /dev/null +++ b/libs/visor_http_client/httpssession.cpp @@ -0,0 +1,414 @@ +#include +#include +#include +#include + +#include "httpssession.h" + +HTTPSSession::HTTPSSession(std::shared_ptr handle, + TCPSession::malformed_data_cb malformed_data_handler, + TCPSession::got_dns_msg_cb got_dns_msg_handler, + TCPSession::connection_ready_cb connection_ready_handler, + handshake_error_cb handshake_error_handler, + Target target, + HTTPMethod method) + : TCPSession(handle, malformed_data_handler, got_dns_msg_handler, connection_ready_handler) + , http2_state{STATE_HTTP2::WAIT_SETTINGS} + , _malformed_data{malformed_data_handler} + , _got_dns_msg{got_dns_msg_handler} + , _handle{handle} + , _tls_state{LinkState::HANDSHAKE} + , _handshake_error{handshake_error_handler} + , _target{std::move(target)} + , _method{method} + , _current_session{nullptr} +{ +} + +HTTPSSession::~HTTPSSession() +{ + destroy_session(); +} + +std::unique_ptr HTTPSSession::create_http2_stream_data(std::unique_ptr data, size_t len) +{ + std::string uri = _target.uri; + struct http_parser_url *u = _target.parsed; + std::string scheme(&uri[u->field_data[UF_SCHEMA].off], u->field_data[UF_SCHEMA].len); + std::string authority(&uri[u->field_data[UF_HOST].off], u->field_data[UF_HOST].len); + std::string path(&uri[u->field_data[UF_PATH].off], u->field_data[UF_PATH].len); + int32_t stream_id = -1; + if (_method == HTTPMethod::GET) { + path.append("?dns="); + path.append(data.get(), len); + } + std::string streamData(data.get(), len); + auto root = std::make_unique(scheme, authority, path, stream_id, streamData); + return root; +} +#define ARRLEN(x) (sizeof(x) / sizeof(x[0])) + +static ssize_t ng2_send_callback([[maybe_unused]] nghttp2_session *session, const uint8_t *data, + size_t length, [[maybe_unused]] int flags, void *user_data) +{ + auto class_session = static_cast(user_data); + class_session->send_tls((void *)data, length); + return (ssize_t)length; +} + +void HTTPSSession::destroy_session() +{ + // Free the SSL session + if (_ssl_session) { + SSL_free(_ssl_session); + _ssl_session = nullptr; + } + // Free the SSL context + if (_ssl_context) { + SSL_CTX_free(_ssl_context); + _ssl_context = nullptr; + } + // Clean up nghttp2 session + nghttp2_session_del(_current_session); +} + +void HTTPSSession::process_receive(const uint8_t *data, size_t len) +{ + auto buf = std::make_unique(len); + memcpy(buf.get(), (const char *)data, len); + _got_dns_msg(std::move(buf), len); +} + +static int ng2_on_data_chunk_recv_callback(nghttp2_session *session, [[maybe_unused]] uint8_t flags, + int32_t stream_id, const uint8_t *data, + size_t len, void *user_data) +{ + auto class_session = static_cast(user_data); + auto req = nghttp2_session_get_stream_user_data(session, stream_id); + if (!req) { + std::cerr << "No stream data on data chunk" << std::endl; + return 0; + } + auto existing = class_session->_recv_chunks.find(stream_id); + if (existing != class_session->_recv_chunks.end()) { + class_session->_recv_chunks[stream_id].insert(class_session->_recv_chunks[stream_id].end(), data, data + len); + } else { + class_session->_recv_chunks[stream_id] = std::vector(data, data + len); + } + return 0; +} + +static int ng2_on_stream_close_callback(nghttp2_session *session, int32_t stream_id, [[maybe_unused]] uint32_t error_code, + [[maybe_unused]] void *user_data) +{ + auto stream_data = static_cast(nghttp2_session_get_stream_user_data(session, stream_id)); + if (!stream_data) { + std::cerr << "No stream data on stream close" << std::endl; + return 0; + } + nghttp2_session_terminate_session(session, NGHTTP2_NO_ERROR); + return 0; +} + +int ng2_on_frame_recv_callback([[maybe_unused]] nghttp2_session *session, + const nghttp2_frame *frame, void *user_data) +{ + auto class_session = static_cast(user_data); + switch (frame->hd.type) { + case NGHTTP2_SETTINGS: + class_session->settings_received(); + break; + case NGHTTP2_DATA: + if (frame->hd.flags & NGHTTP2_FLAG_END_STREAM) { + auto data = class_session->_recv_chunks[frame->data.hd.stream_id]; + class_session->process_receive(data.data(), data.size()); + } + } + return 0; +} + +void HTTPSSession::init_nghttp2() +{ + nghttp2_session_callbacks *callbacks; + nghttp2_session_callbacks_new(&callbacks); + nghttp2_session_callbacks_set_send_callback(callbacks, ng2_send_callback); + nghttp2_session_callbacks_set_on_data_chunk_recv_callback(callbacks, ng2_on_data_chunk_recv_callback); + nghttp2_session_callbacks_set_on_stream_close_callback(callbacks, ng2_on_stream_close_callback); + nghttp2_session_callbacks_set_on_frame_recv_callback(callbacks, ng2_on_frame_recv_callback); + nghttp2_session_client_new(&_current_session, callbacks, this); + nghttp2_session_callbacks_del(callbacks); +} +#define WHERE_INFO(ssl, w, flag, msg) { \ + if(w & flag) { \ + printf("\t"); \ + printf(msg); \ + printf(" - %s ", SSL_state_string(ssl)); \ + printf(" - %s ", SSL_state_string_long(ssl)); \ + printf("\n"); \ + }\ + } + +// INFO CALLBACK +void dummy_ssl_info_callback(const SSL* ssl, int where, int ret) { + if(ret == 0) { + printf("dummy_ssl_info_callback, error occured.\n"); + return; + } + WHERE_INFO(ssl, where, SSL_CB_LOOP, "LOOP"); + WHERE_INFO(ssl, where, SSL_CB_EXIT, "EXIT"); + WHERE_INFO(ssl, where, SSL_CB_READ, "READ"); + WHERE_INFO(ssl, where, SSL_CB_WRITE, "WRITE"); + WHERE_INFO(ssl, where, SSL_CB_ALERT, "ALERT"); + WHERE_INFO(ssl, where, SSL_CB_HANDSHAKE_DONE, "HANDSHAKE DONE"); +} +void dummy_ssl_msg_callback( + int writep + ,int version + ,int contentType + ,const void* buf + ,size_t len + ,SSL* ssl + ,void *arg +) +{ + printf("\tMessage callback with length: %zu\n", len); +} + +bool HTTPSSession::setup() +{ + // Initialize OpenSSL + SSL_library_init(); + OpenSSL_add_all_algorithms(); + SSL_load_error_strings(); + + // Create a new SSL_CTX object as a framework for TLS/SSL enabled functions + _ssl_context = SSL_CTX_new(TLS_client_method()); + if (!_ssl_context) { + std::cerr << "OpenSSL failed to create SSL_CTX object." << std::endl; + return false; + } + + // Load the system's default certificates for verification purposes + if (!SSL_CTX_set_default_verify_paths(_ssl_context)) { + std::cerr << "OpenSSL failed to set default verify paths." << std::endl; + return false; + } + + const unsigned char alpn_protos[] = {2, 'h', '2'}; // 2 is the length of 'h2' + if (SSL_CTX_set_alpn_protos(_ssl_context, alpn_protos, sizeof(alpn_protos))) { + std::cerr << "OpenSSL failed to set ALPN." << std::endl; + return false; + } + + // Create SSL session + _ssl_session = SSL_new(_ssl_context); + if (!_ssl_session) { + std::cerr << "OpenSSL failed to create SSL session." << std::endl; + return false; + } + SSL_CTX_set_info_callback(_ssl_context, dummy_ssl_info_callback); + SSL_CTX_set_msg_callback(_ssl_context, dummy_ssl_msg_callback); + _read_bio = BIO_new(BIO_s_mem()); + _write_bio = BIO_new(BIO_s_mem()); + SSL_set_bio(_ssl_session, _read_bio, _write_bio); + SSL_set_connect_state(_ssl_session); + return true; +} +void HTTPSSession::send_settings() +{ + nghttp2_settings_entry settings[1] = {{NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, (1U << 31) - 1}}; + int val; + val = nghttp2_submit_settings(_current_session, NGHTTP2_FLAG_NONE, settings, ARRLEN(settings)); + if (val != 0) { + std::cerr << "Could not submit SETTINGS frame: " << nghttp2_strerror(val) << std::endl; + } +} + +void HTTPSSession::settings_received() +{ + if (http2_state == STATE_HTTP2::WAIT_SETTINGS) { + TCPSession::on_connect_event(); + http2_state = STATE_HTTP2::SENDING_DATA; + } +} + +void HTTPSSession::receive_response(const char data[], size_t len) +{ + ssize_t stream_id = nghttp2_session_mem_recv(_current_session, (const uint8_t *)data, len); + if (stream_id < 0) { + std::cerr << "Could not get HTTP2 request: " << nghttp2_strerror(stream_id); + close(); + return; + } +} + +int HTTPSSession::session_send() +{ + int rv; + rv = nghttp2_session_send(_current_session); + if (rv != 0) { + std::cerr << "HTTP2 fatal error: " << nghttp2_strerror(rv); + return -1; + } + return 0; +} + +void HTTPSSession::on_connect_event() +{ + _current_session = {}; + do_handshake(); +} + +void HTTPSSession::close() +{ + _tls_state = LinkState::CLOSE; + // Shutdown the SSL/TLS session gracefully + SSL_shutdown(_ssl_session); + // Free up the SSL session + SSL_free(_ssl_session); + _ssl_session = nullptr; + TCPSession::close(); +} + +static ssize_t post_data(nghttp2_session *session, int32_t stream_id, uint8_t *buf, size_t length, + uint32_t *data_flags, [[maybe_unused]] nghttp2_data_source *source, [[maybe_unused]] void *user_data) +{ + auto stream_data = static_cast(nghttp2_session_get_stream_user_data(session, stream_id)); + size_t nread = std::min(stream_data->data.size(), length); + memcpy(buf, stream_data->data.c_str(), nread); + *data_flags = NGHTTP2_DATA_FLAG_EOF; + return nread; +} + +#define HDR_S(NAME, VALUE) \ + { \ + (uint8_t *)NAME, (uint8_t *)VALUE.c_str(), sizeof(NAME) - 1, VALUE.size(), \ + NGHTTP2_NV_FLAG_NONE \ + } + +void HTTPSSession::write(std::unique_ptr data, size_t len) +{ + int32_t stream_id; + auto stream_data = create_http2_stream_data(std::move(data), len); + nghttp2_data_provider provider = {}; + + std::string method = _method == HTTPMethod::GET ? "GET" : "POST"; + std::string content = "application/dns-message"; + std::vector hdrs{ + HDR_S(":method", method), + HDR_S(":scheme", stream_data->scheme), + HDR_S(":authority", stream_data->authority), + HDR_S(":path", stream_data->path), + HDR_S("accept", content)}; + if (_method == HTTPMethod::POST) { + hdrs.push_back(HDR_S("content-type", content)); + hdrs.push_back(HDR_S("content-length", std::to_string(len))); + provider.read_callback = post_data; + } + + stream_id = nghttp2_submit_request(_current_session, NULL, hdrs.data(), hdrs.size(), &provider, stream_data.get()); + if (stream_id < 0) { + std::cerr << "Could not submit HTTP request: " << nghttp2_strerror(stream_id); + } + + stream_data->id = stream_id; + + if (session_send() != 0) { + std::cerr << "HTTP2 failed to send" << std::endl; + } +} +void HTTPSSession::flush_read_bio() { + char buf[1024*16]; + int bytes_read = 0; + while((bytes_read = BIO_read(_write_bio, buf, sizeof(buf))) > 0) { + std::cerr << "flush_read_bio: " << buf << std::endl; + // WRITE TO STREAM + _handle->write(buf, bytes_read); + } +} +void HTTPSSession::receive_data(const char data[], size_t _len) +{ + std::cerr << "receive_data: " << data << std::endl; + int written = BIO_write(_read_bio, data, _len); +// _pull_buffer.append(data, _len); + switch (_tls_state) { + case LinkState::HANDSHAKE: + do_handshake(); + break; + case LinkState::DATA: + char buf[16384]; + for (;;) { + int len = SSL_read(_ssl_session, buf, sizeof(buf)); + if (len > 0) { + receive_response(buf, len); + } else { + int error = SSL_get_error(_ssl_session, len); + if (error == SSL_ERROR_WANT_READ) { + // OpenSSL wants to read more data. Check if we don't have any data left to read. +// if (_pull_buffer.empty()) { +// break; +// } + continue; + } else if (error == SSL_ERROR_WANT_WRITE) { + // OpenSSL wants to write data (e.g., for renegotiation). Continue processing. + continue; + } else { + // Some other error occurred. Handle as necessary. + std::cerr << "OpenSSL error while reading data: " << ERR_reason_error_string(error) << std::endl; + break; + } + } + } + break; + case LinkState::CLOSE: + break; + } +} + +void HTTPSSession::send_tls(void *data, size_t len) +{ + int sent = SSL_write(_ssl_session, data, len); + if (sent <= 0) { + int error = SSL_get_error(_ssl_session, sent); + std::cerr << "OpenSSL error while sending data: " << ERR_reason_error_string(error) << std::endl; + } +} + +void HTTPSSession::do_handshake() +{ + int err = SSL_connect(_ssl_session); // client-side + if (err == 1) { // Successful handshake +// const unsigned char *alpn = NULL; +// unsigned int alpnlen = 0; +// SSL_get0_alpn_selected(_ssl_session, &alpn, &alpnlen); +// if (!alpn || alpnlen != 2 || memcmp(alpn, "h2", 2) != 0) { +// std::cerr << "Cannot get ALPN or ALPN is not 'h2'." << std::endl; +// close(); +// return; +// } + init_nghttp2(); + send_settings(); + if (session_send() != 0) { + std::cerr << "Cannot submit settings frame" << std::endl; + } + _tls_state = LinkState::DATA; + } else { + int error = SSL_get_error(_ssl_session, err); + if (error == SSL_ERROR_SSL) { + std::cerr << "Handshake failed: SSL error" << std::endl; + ERR_print_errors_fp(stderr); + _handshake_error(); + } else if (error == SSL_ERROR_SYSCALL) { + std::cerr << "Handshake failed: syscall error" << std::endl; + ERR_print_errors_fp(stderr); + _handshake_error(); + } else if (error == SSL_ERROR_WANT_READ) { + std::cout << "Handshake needs READ" << std::endl; + flush_read_bio(); + } else if (error == SSL_ERROR_WANT_WRITE) { + std::cout << "Handshake needs WRITE" << std::endl; + } else { + std::cerr << "Unknown handshake error." << std::endl; + } + } +} \ No newline at end of file diff --git a/libs/visor_http_client/httpssession.h b/libs/visor_http_client/httpssession.h new file mode 100644 index 000000000..0d0e2a116 --- /dev/null +++ b/libs/visor_http_client/httpssession.h @@ -0,0 +1,129 @@ +#pragma once + +#include +#include + +using ssize_t = std::make_signed_t; //Windows fix required +#include + +#include +#include + +#include "base64.h" +#include "tcpsession.h" +#include "url_parser.h" + +struct Target { + http_parser_url *parsed; + std::string address; + std::string uri; + std::string port; +}; + +enum class HTTPMethod { + POST, + GET, +}; + +struct http2_stream_data { + http2_stream_data(std::string _scheme, std::string _authority, std::string _path, int32_t _id, std::string _data) + : scheme(_scheme) + , authority(_authority) + , path(_path) + , id(_id) + , data(_data) + { + } + + std::string scheme; + std::string authority; + std::string path; + int32_t id; + std::string data; +}; + +enum STATE_HTTP2 { + WAIT_SETTINGS, + SENDING_DATA +}; + +class HTTPSSession : public TCPSession +{ +public: + using log_send_cb = std::function; + using handshake_error_cb = std::function; + + HTTPSSession(std::shared_ptr handle, + TCPSession::malformed_data_cb malformed_data_handler, + TCPSession::got_dns_msg_cb got_dns_msg_handler, + TCPSession::connection_ready_cb connection_ready_handler, + handshake_error_cb handshake_error_handler, + Target target, + HTTPMethod method); + + ~HTTPSSession() override; + + virtual bool setup() override; + + virtual void on_connect_event() override; + + void send_tls(void *data, size_t len); + + void init_nghttp2(); + + void send_settings(); + + void receive_response(const char data[], size_t len); + + int session_send(); + + int session_receive(); + + void close() override; + + void receive_data(const char data[], size_t len) override; + + void write(std::unique_ptr data, size_t len) override; + + void process_receive(const uint8_t *data, size_t len); + + std::unique_ptr create_http2_stream_data(std::unique_ptr data, size_t len); + + void add_stream(http2_stream_data *stream_data); + + void remove_stream(http2_stream_data *stream_data); + + void settings_received(); + + std::unordered_map> _recv_chunks; + +protected: + void destroy_stream(); + + void destroy_session(); + + void do_handshake(); + +private: + STATE_HTTP2 http2_state; + malformed_data_cb _malformed_data; + got_dns_msg_cb _got_dns_msg; + std::shared_ptr _handle; + enum class LinkState { + HANDSHAKE, + DATA, + CLOSE + } _tls_state; + handshake_error_cb _handshake_error; + Target _target; + HTTPMethod _method; + + nghttp2_session *_current_session; +// std::string _pull_buffer; + + SSL *_ssl_session; + SSL_CTX *_ssl_context; + BIO *_read_bio; + BIO *_write_bio; + void flush_read_bio(); +}; diff --git a/libs/visor_http_client/tcpsession.cpp b/libs/visor_http_client/tcpsession.cpp new file mode 100644 index 000000000..1963ccce2 --- /dev/null +++ b/libs/visor_http_client/tcpsession.cpp @@ -0,0 +1,88 @@ + +#include "tcpsession.h" + +#include +#include +#include + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +#include +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +TCPSession::TCPSession(std::shared_ptr handle, + malformed_data_cb malformed_data_handler, + got_dns_msg_cb got_dns_msg_handler, + connection_ready_cb connection_ready_handler) + : _handle{std::move(handle)} + , _malformed_data{std::move(malformed_data_handler)} + , _got_dns_msg{std::move(got_dns_msg_handler)} + , _connection_ready{std::move(connection_ready_handler)} +{ +} + +// do any pre-connection setup, return true if all OK. +bool TCPSession::setup() +{ + return true; +} + +void TCPSession::on_connect_event() +{ + _connection_ready(); +} + +// remote peer closed connection +void TCPSession::on_end_event() +{ + _handle->close(); +} + +// all local writes now finished +void TCPSession::on_shutdown_event() +{ + _handle->close(); +} + +// gracefully terminate the session +void TCPSession::close() +{ + _handle->stop(); + _handle->shutdown(); +} + +// accumulate data and try to extract DNS messages +void TCPSession::receive_data(const char data[], size_t len) +{ + _buffer.append(data, len); + + for (;;) { + std::uint16_t size; + + if (_buffer.size() < sizeof(size)) + break; + + // size is in network byte order. + size = static_cast(_buffer[1]) | static_cast(_buffer[0]) << 8; + + if (_buffer.size() >= sizeof(size) + size) { + auto data = std::make_unique(size); + std::memcpy(data.get(), _buffer.data() + sizeof(size), size); + _buffer.erase(0, sizeof(size) + size); + _got_dns_msg(std::move(data), size); + } else { + // Nope, we need more data. + break; + } + } +} + +// send data, giving data ownership to async library +void TCPSession::write(std::unique_ptr data, size_t len) +{ + _handle->write(std::move(data), len); +} diff --git a/libs/visor_http_client/tcpsession.h b/libs/visor_http_client/tcpsession.h new file mode 100644 index 000000000..490e8adfd --- /dev/null +++ b/libs/visor_http_client/tcpsession.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include + +namespace uvw { +class tcp_handle; +} + +class TCPSession +{ +public: + using malformed_data_cb = std::function; + using got_dns_msg_cb = std::function data, size_t size)>; + using connection_ready_cb = std::function; + + TCPSession(std::shared_ptr handle, + malformed_data_cb malformed_data_handler, + got_dns_msg_cb got_dns_msg_handler, + connection_ready_cb connection_ready_handler); + virtual ~TCPSession() = default; + + virtual bool setup(); + + virtual void on_connect_event(); + virtual void on_end_event(); + virtual void on_shutdown_event(); + + virtual void close(); + virtual void receive_data(const char data[], size_t len); + virtual void write(std::unique_ptr data, size_t len); + +private: + std::string _buffer; + std::shared_ptr _handle; + malformed_data_cb _malformed_data; + got_dns_msg_cb _got_dns_msg; + connection_ready_cb _connection_ready; +}; diff --git a/libs/visor_http_client/test_driver.cpp b/libs/visor_http_client/test_driver.cpp new file mode 100644 index 000000000..50963e056 --- /dev/null +++ b/libs/visor_http_client/test_driver.cpp @@ -0,0 +1,188 @@ +#include + +#include +#define CPPHTTPLIB_OPENSSL_SUPPORT +#include + +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +#include +#include +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif + +#include "httpssession.h" + +void connect_tcp_events(std::shared_ptr tcp_handle, std::shared_ptr tcp_session) +{ + /** SOCKET CALLBACKS **/ + + // SOCKET: local socket was closed, cleanup resources and possibly restart another connection + tcp_handle->on([](uvw::close_event &, uvw::tcp_handle &handle) { + std::cerr << "close_event" << std::endl; + handle.stop(); + }); + + // SOCKET: socket error + tcp_handle->on([](uvw::error_event &event, uvw::tcp_handle &handle) { + std::cerr << "error_event: " << handle.sock().ip << ":" << handle.sock().port << " - " << event.what() << std::endl; + handle.close(); + }); + + // INCOMING: remote peer closed connection, EOF + tcp_handle->on([tcp_session](uvw::end_event &, uvw::tcp_handle &) { + std::cerr << "end_event" << std::endl; + tcp_session->on_end_event(); + }); + + // OUTGOING: we've finished writing all our data and are shutting down + tcp_handle->on([tcp_session](uvw::shutdown_event &, uvw::tcp_handle &) { + std::cerr << "shutdown_event" << std::endl; + tcp_session->on_shutdown_event(); + }); + + // INCOMING: remote peer sends data, pass to session + tcp_handle->on([tcp_session](uvw::data_event &event, uvw::tcp_handle &) { + std::cerr << "data_event" << std::endl; + tcp_session->receive_data(event.data.get(), event.length); + }); + + // OUTGOING: write operation has finished + tcp_handle->on([](uvw::write_event &, uvw::tcp_handle &) { + std::cerr << "WriteEvent" << std::endl; + }); + + // SOCKET: on connect + tcp_handle->on([tcp_session](uvw::connect_event &, uvw::tcp_handle &handle) { + std::cerr << "ConnectEvent" << std::endl; + // start reading from incoming stream, fires data_event when receiving + handle.read(); + tcp_session->on_connect_event(); + + }); +} + +TEST_CASE("HTTP Client", "[http]") +{ + auto loop = uvw::loop::get_default(); + + auto family = AF_INET; + + auto svr = std::make_unique( + "/tmp/cacert.pem", + "/tmp/cakey.pem"); + if (!svr->is_valid()) { + std::cerr << "could not create test server" << std::endl; + return; + } + auto svr_port = svr->bind_to_any_port("127.0.0.1"); + if (svr_port <= 0) { + std::cerr << "could not bind test server" << std::endl; + return; + } else { + std::cerr << "tls test server started on 127.0.0.1:" << svr_port << std::endl; + } + + auto svr_thread = std::make_unique([&svr] { + svr->listen_after_bind(); + }); + + std::vector target_list; + std::vector raw_target_list; + raw_target_list.emplace_back("https://127.0.0.1:" + std::to_string(svr_port)); + auto request = loop->resource(); + for (const auto &i : raw_target_list) { + uvw::socket_address addr; + struct http_parser_url parsed = {}; + std::string url = i; + if (url.rfind("https://", 0) != 0) { + url.insert(0, "https://"); + } + int ret = http_parser_parse_url(url.c_str(), strlen(url.c_str()), 0, &parsed); + if (ret != 0) { + std::cerr << "could not parse url: " << url << std::endl; + } + std::string authority(&url[parsed.field_data[UF_HOST].off], parsed.field_data[UF_HOST].len); + std::string port; + if (parsed.field_data[UF_PORT].len) { + port = std::string(&url[parsed.field_data[UF_PORT].off], parsed.field_data[UF_PORT].len); + } + + auto target_resolved = request->addr_info_sync(authority, port); + if (!target_resolved.first) { + std::cerr << "unable to resolve target address: " << authority << std::endl; + if (i == "file") { + std::cerr << "(did you mean to include --targets?)" << std::endl; + } + } + addrinfo *node{target_resolved.second.get()}; + while (node && node->ai_family != family) { + node = node->ai_next; + } + if (!node) { + std::cerr << "name did not resolve to valid IP address for this inet family: " << i + << std::endl; + continue; + } + + if (family == AF_INET) { + char buffer[INET_ADDRSTRLEN]; + inet_ntop(AF_INET, &reinterpret_cast(node->ai_addr)->sin_addr, buffer, INET_ADDRSTRLEN); + addr.ip = buffer; + } else if (family == AF_INET6) { + char buffer[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, &reinterpret_cast(node->ai_addr)->sin6_addr, buffer, INET6_ADDRSTRLEN); + addr.ip = buffer; + } + target_list.push_back({&parsed, addr.ip, url, port}); + } + + if (!target_list.size()) { + std::cerr << "no targets resolved" << std::endl; + return; + } + + // --- + + std::shared_ptr tcp_session; + auto tcp_handle = loop->resource(family); + + auto malformed_data = [tcp_handle]() { + std::cerr << "malformed_data or handshake error" << std::endl; + tcp_handle->close(); + }; + auto got_dns_message = []([[maybe_unused]] std::unique_ptr data, + [[maybe_unused]] size_t size) { + std::cerr << "got_dns_message" << std::endl; + // process_wire(data.get(), size); + }; + auto connection_ready = [tcp_session]() { + /** SEND DATA **/ + std::cerr << "connection_ready" << std::endl; + // tcp_session->write(std::move(std::get<0>(qt)), std::get<1>(qt)); + }; + + tcp_session = std::make_shared(tcp_handle, + malformed_data, + got_dns_message, + connection_ready, + malformed_data, + target_list[0], + HTTPMethod::GET); + connect_tcp_events(tcp_handle, tcp_session); + if (!tcp_session->setup()) { + std::cerr << "setup failed" << std::endl; + } + std::cerr << "connecting to " << target_list[0].address << ":" << target_list[0].port << std::endl; + tcp_handle->connect(target_list[0].address, std::stoul(target_list[0].port)); + + // ---- + CHECK(loop->run() == 0); + loop = nullptr; + svr->stop(); + svr_thread->join(); + +} diff --git a/libs/visor_http_client/url_parser.c b/libs/visor_http_client/url_parser.c new file mode 100644 index 000000000..4912ee206 --- /dev/null +++ b/libs/visor_http_client/url_parser.c @@ -0,0 +1,652 @@ +/* Copyright Joyent, Inc. and other Node contributors. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "url_parser.h" +#include +#include +#include +#include +#include + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_http_major + , s_res_http_dot + , s_res_http_minor + , s_res_http_end + , s_res_first_status_code + , s_res_status_code + , s_res_status_start + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_http_I + , s_req_http_IC + , s_req_http_major + , s_req_http_dot + , s_req_http_minor + , s_req_http_end + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) + +#if HTTP_PARSER_STRICT +#define TOKEN(c) STRICT_TOKEN(c) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) tokens[(unsigned char)c] +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* fall through */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* fall through */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = (uint16_t)(p - buf); + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = (uint16_t)(p - buf); + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + if (buflen == 0) { + return 1; + } + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* fall through */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = (uint16_t)(p - buf); + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + uint16_t off; + uint16_t len; + const char* p; + const char* end; + unsigned long v; + + off = u->field_data[UF_PORT].off; + len = u->field_data[UF_PORT].len; + end = buf + off + len; + + /* NOTE: The characters are already validated and are in the [0-9] range */ + assert(off + len <= buflen && "Port number overflow"); + v = 0; + for (p = buf + off; p < end; p++) { + v *= 10; + v += *p - '0'; + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + } + + u->port = (uint16_t) v; + } + + return 0; +} diff --git a/libs/visor_http_client/url_parser.h b/libs/visor_http_client/url_parser.h new file mode 100644 index 000000000..78b3096c5 --- /dev/null +++ b/libs/visor_http_client/url_parser.h @@ -0,0 +1,94 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef url_parser_h +#define url_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 9 +#define HTTP_PARSER_VERSION_PATCH 1 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && \ + (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) +#include +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); +#ifdef __cplusplus +} +#endif +#endif