From d8e37ec9d50874f0112193d29b1540fad56857a0 Mon Sep 17 00:00:00 2001 From: Mohammad Nejati Date: Tue, 16 Dec 2025 13:15:10 +0000 Subject: [PATCH] feat: multipart_form_sink --- .../boost/http_proto/multipart_form_sink.hpp | 90 ++++++ .../rfc/content_disposition_rule.hpp | 74 +++++ include/boost/http_proto/rfc/parameter.hpp | 42 +++ .../http_proto/rfc/quoted_token_view.hpp | 4 - src/multipart_form_sink.cpp | 300 ++++++++++++++++++ src/rfc/content_disposition_rule.cpp | 37 +++ src/rfc/parameter.cpp | 48 ++- src/rfc/quoted_token_rule.cpp | 4 +- test/unit/multipart_form_sink.cpp | 54 ++++ test/unit/rfc/content_disposition_rule.cpp | 96 ++++++ test/unit/rfc/quoted_token_rule.cpp | 50 ++- 11 files changed, 780 insertions(+), 19 deletions(-) create mode 100644 include/boost/http_proto/multipart_form_sink.hpp create mode 100644 include/boost/http_proto/rfc/content_disposition_rule.hpp create mode 100644 src/multipart_form_sink.cpp create mode 100644 src/rfc/content_disposition_rule.cpp create mode 100644 test/unit/multipart_form_sink.cpp create mode 100644 test/unit/rfc/content_disposition_rule.cpp diff --git a/include/boost/http_proto/multipart_form_sink.hpp b/include/boost/http_proto/multipart_form_sink.hpp new file mode 100644 index 00000000..d1b0a509 --- /dev/null +++ b/include/boost/http_proto/multipart_form_sink.hpp @@ -0,0 +1,90 @@ +// +// Copyright (c) 2025 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_proto +// + +#ifndef BOOST_HTTP_PROTO_MULTIPART_FORM_SINK_HPP +#define BOOST_HTTP_PROTO_MULTIPART_FORM_SINK_HPP + +#include +#include + +#include +#include + +#include + +namespace boost { +namespace http_proto { + +class BOOST_HTTP_PROTO_SYMBOL_VISIBLE multipart_form_sink + : public http_proto::sink +{ +public: + struct file_field + { + std::string name; + std::string path; + }; + + struct text_field + { + std::string data; + }; + + struct part + { + std::string name; + variant2::variant content; + boost::optional content_type; + }; + + BOOST_HTTP_PROTO_DECL + explicit + multipart_form_sink( + core::string_view boundary); + + BOOST_HTTP_PROTO_DECL + boost::span + parts() const noexcept; + +private: + BOOST_HTTP_PROTO_DECL + results + on_write( + buffers::const_buffer b, + bool more) override; + + void + parse( + bool match, + core::string_view b, + system::error_code& ec); + + enum class state + { + preamble, + post_boundary0, + post_boundary1, + post_boundary2, + header, + content, + finished + }; + + state state_ = state::preamble; + std::string needle_; + std::string leftover_; + std::string header_; + capy::file file_; + std::vector parts_; +}; + +} // http_proto +} // boost + +#endif diff --git a/include/boost/http_proto/rfc/content_disposition_rule.hpp b/include/boost/http_proto/rfc/content_disposition_rule.hpp new file mode 100644 index 00000000..acccfc5f --- /dev/null +++ b/include/boost/http_proto/rfc/content_disposition_rule.hpp @@ -0,0 +1,74 @@ +// +// Copyright (c) 2025 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_proto +// + +#ifndef BOOST_HTTP_PROTO_RFC_CONTENT_DISPOSITION_RULE_HPP +#define BOOST_HTTP_PROTO_RFC_CONTENT_DISPOSITION_RULE_HPP + +#include +#include +#include + +namespace boost { +namespace http_proto { + +namespace implementation_defined { +struct content_disposition_rule_t +{ + struct value_type + { + core::string_view type; + grammar::range params; + }; + + BOOST_HTTP_PROTO_DECL + system::result + parse( + char const*& it, + char const* end) const noexcept; +}; +} // implementation_defined + +/** Rule matching content-disposition + + @par Value Type + @code + struct value_type + { + core::string_view type; + grammar::range< parameter > params; + }; + @endcode + + @par Example + @code + @endcode + + @par BNF + @code + content-disposition = disposition-type *( OWS ";" OWS disposition-parm ) + + disposition-type = token + disposition-parm = token "=" ( token / quoted-string ) + @endcode + + @par Specification + @li 4.1. Grammar (rfc6266) + @li 3.2.6. Field Value Components (rfc7230) + + @see + @ref quoted_token_view. +*/ +BOOST_INLINE_CONSTEXPR implementation_defined::content_disposition_rule_t content_disposition_rule{}; + +} // http_proto +} // boost + +#endif diff --git a/include/boost/http_proto/rfc/parameter.hpp b/include/boost/http_proto/rfc/parameter.hpp index 8d4f917a..fa1f88b6 100644 --- a/include/boost/http_proto/rfc/parameter.hpp +++ b/include/boost/http_proto/rfc/parameter.hpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace boost { namespace http_proto { @@ -48,6 +49,18 @@ struct parameter_rule_t char const*) const noexcept -> system::result; }; + +struct parameters_rule_t +{ + using value_type = grammar::range; + + BOOST_HTTP_PROTO_DECL + auto + parse( + char const*&, + char const*) const noexcept -> + system::result; +}; } // implementation_defined /** Rule matching parameter @@ -75,6 +88,35 @@ struct parameter_rule_t */ BOOST_INLINE_CONSTEXPR implementation_defined::parameter_rule_t parameter_rule{}; +//------------------------------------------------ + +/** Rule matching parameters + + @par Value Type + @code + using value_type = grammar::range< parameter >; + @endcode + + @par Example + @code + @endcode + + @par BNF + @code + parameters = *( OWS ";" OWS parameter ) + parameter = token "=" ( token / quoted-string ) + @endcode + + @par Specification + @li 3.1.1.1. Media Type (rfc7231) + + @see + @ref parameter, + @ref parameter_rule. +*/ +BOOST_INLINE_CONSTEXPR implementation_defined::parameters_rule_t parameters_rule{}; + } // http_proto } // boost diff --git a/include/boost/http_proto/rfc/quoted_token_view.hpp b/include/boost/http_proto/rfc/quoted_token_view.hpp index c31650ed..41a1df00 100644 --- a/include/boost/http_proto/rfc/quoted_token_view.hpp +++ b/include/boost/http_proto/rfc/quoted_token_view.hpp @@ -50,10 +50,6 @@ class quoted_token_view final : string_view_base(s) , n_(n) { - BOOST_ASSERT(s.size() >= 2); - BOOST_ASSERT(s.front() == '\"'); - BOOST_ASSERT(s.back() == '\"'); - BOOST_ASSERT(n_ <= s_.size() - 2); } public: diff --git a/src/multipart_form_sink.cpp b/src/multipart_form_sink.cpp new file mode 100644 index 00000000..4eb26edc --- /dev/null +++ b/src/multipart_form_sink.cpp @@ -0,0 +1,300 @@ +// +// Copyright (c) 2025 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_proto +// + +#include +#include + +#include "src/rfc/detail/rules.hpp" + +#include + +namespace boost { +namespace http_proto { + +namespace { +std::string +unquote(quoted_token_view qtv) +{ + if(qtv.has_escapes()) + return { qtv.begin(), qtv.end() }; + + auto rs = std::string{}; + for(auto it = qtv.begin(); it != qtv.end(); it++) + { + if(*it == '\\') + it++; + rs.push_back(*it); + } + return rs; +} +} // namespace + +multipart_form_sink:: +multipart_form_sink( + core::string_view boundary) +{ + leftover_.append("\r\n"); + needle_.append("\r\n--"); + needle_.append(boundary); +} + +auto +multipart_form_sink:: +parts() const noexcept + -> boost::span +{ + return parts_; +} + +auto +multipart_form_sink:: +on_write( + buffers::const_buffer b, + bool more) -> results +{ + system::error_code ec; + core::string_view sv( + static_cast(b.data()), b.size()); + + if(!leftover_.empty()) + { + for(std::size_t i = 0; i < leftover_.size(); ++i) + { + core::string_view nd(needle_); + if(!nd.starts_with({ &leftover_[i], &*leftover_.cend() })) + continue; + nd.remove_prefix(leftover_.size() - i); + if(sv.size() >= nd.size()) + { + if(!sv.starts_with(nd)) + continue; + parse(true, { leftover_.data(), i }, ec); + if(ec.failed()) + goto upcall; + leftover_.clear(); + sv.remove_prefix(nd.size()); + goto loop_sv; + } + if(!more) + break; + if(!nd.starts_with(sv)) + continue; + parse(false, { leftover_.data(), i }, ec); + if(ec.failed()) + goto upcall; + leftover_.erase(0, i); + leftover_.append(sv); + return { ec , sv.size() }; + } + // leftover_ cannot contain a needle + parse(false, leftover_, ec); + if(ec.failed()) + goto upcall; + leftover_.clear(); + } + +loop_sv: + for(char const* it = sv.begin(); it != sv.end(); ++it) + { + if(*it == '\r') + { + core::string_view rm(it, sv.end()); + if(rm.size() >= needle_.size()) + { + if(std::equal( + needle_.rbegin(), + needle_.rend(), + rm.rend() - needle_.size())) + { + parse(true, { sv.begin(), it }, ec); + if(ec.failed()) + goto upcall; + sv = { it + needle_.size(), sv.end() }; + goto loop_sv; + } + continue; + } + if(!more) + break; + if(!core::string_view(needle_).starts_with(rm)) + continue; + parse(false, { sv.begin(), it }, ec); + if(ec.failed()) + goto upcall; + leftover_.append(it, sv.end()); + sv = {}; + goto upcall; + } + } + parse(false, sv, ec); + if(!ec.failed()) + sv= {}; + +upcall: + return { ec, b.size() - sv.size() }; +} + +void +multipart_form_sink:: +parse( + bool match, + core::string_view b, + system::error_code& ec) +{ +loop: + switch(state_) + { + case state::preamble: + if(match) + state_ = state::post_boundary0; + break; + case state::post_boundary0: + if(b.empty()) + break; + switch(b[0]) + { + case '\r': + state_ = state::post_boundary1; + break; + case '-': + state_ = state::post_boundary2; + break; + default: + ec = http_proto::error::bad_payload; + return; + } + b.remove_prefix(1); + goto loop; + case state::post_boundary1: + if(b.empty()) + break; + if(b[0] != '\n') + { + ec = http_proto::error::bad_payload; + return; + } + b.remove_prefix(1); + state_ = state::header; + goto loop; + case state::post_boundary2: + if(b.empty()) + break; + if(b[0] != '-') + { + ec = http_proto::error::bad_payload; + return; + } + b.remove_prefix(1); + state_ = state::finished; + break; + case state::header: + { + auto const s0 = header_.size(); + header_.append(b); + auto const pos = header_.find("\r\n\r\n"); + if(pos == std::string::npos) + break; + header_.erase(pos + 4); + b.remove_prefix(header_.size() - s0); + + // parse fields + + auto const fields = grammar::parse( + header_, + grammar::range_rule(detail::field_rule)); + + if(!fields) + { + ec = http_proto::error::bad_payload; + return; + } + + part part{}; + + for(auto&& field : fields.value()) + { + if(field.has_obs_fold) + continue; // TODO + + if(grammar::ci_is_equal(field.name, "Content-Disposition")) + { + // parse Content-Disposition + auto cd = grammar::parse(field.value, content_disposition_rule); + if(!cd || !grammar::ci_is_equal(cd->type, "form-data")) + { + ec = http_proto::error::bad_payload; + return; + } + for(auto && param : cd->params) + { + if(grammar::ci_is_equal(param.name, "name")) + { + part.name = unquote(param.value); + } + else if(grammar::ci_is_equal(param.name, "filename")) + { + auto& ff = part.content.emplace(); + ff.name = unquote(param.value); + + // TODO + ff.path = ff.name; + file_.open( + ff.path.c_str(), + capy::file_mode::write_new, + ec); + if(ec.failed()) + return; + } + } + } + else if(grammar::ci_is_equal(field.name, "Content-Type")) + { + part.content_type = field.value; + } + } + + if(part.name.empty()) + { + ec = http_proto::error::bad_payload; + return; + } + + header_.clear(); + parts_.push_back(std::move(part)); + state_ = state::content; + BOOST_FALLTHROUGH; + } + case state::content: + if(auto* p = variant2::get_if( + &parts_.back().content)) + { + p->data.append(b); + } + else + { + file_.write(b.data(), b.size(), ec); + if(ec.failed()) + return; + if(match) + { + file_.close(ec); + if(ec.failed()) + return; + } + } + if(match) + state_ = state::post_boundary0; + break; + case state::finished: + break; + } +} + +} // http_proto +} // boost diff --git a/src/rfc/content_disposition_rule.cpp b/src/rfc/content_disposition_rule.cpp new file mode 100644 index 00000000..4bdda7cb --- /dev/null +++ b/src/rfc/content_disposition_rule.cpp @@ -0,0 +1,37 @@ +// +// Copyright (c) 2025 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_proto +// + +#include +#include +#include + +namespace boost { +namespace http_proto { +namespace implementation_defined { +auto +content_disposition_rule_t:: +parse( + char const*& it, + char const* end) const noexcept-> + system::result +{ + auto type = grammar::parse(it, end, token_rule); + if(!type) + return type.error(); + + auto params = grammar::parse(it, end, parameters_rule); + if(!params) + return params.error(); + + return value_type{ *type, *params }; +} + +} // implementation_defined +} // http_proto +} // boost diff --git a/src/rfc/parameter.cpp b/src/rfc/parameter.cpp index 58104d73..e0468111 100644 --- a/src/rfc/parameter.cpp +++ b/src/rfc/parameter.cpp @@ -8,7 +8,14 @@ // #include +#include +#include +#include + #include +#include +#include +#include namespace boost { namespace http_proto { @@ -20,9 +27,44 @@ parse( char const* end) const noexcept -> system::result { - (void)it; - (void)end; - return system::error_code{}; + auto name = grammar::parse(it, end, token_rule); + if(!name) + return name.error(); + + if(it == end) + BOOST_HTTP_PROTO_RETURN_EC( + grammar::error::need_more); + + if(*it++ != '=') + BOOST_HTTP_PROTO_RETURN_EC( + grammar::error::mismatch); + + auto value = grammar::parse( + it, end, quoted_token_rule); + if(!value) + return value.error(); + + return value_type{ *name, *value }; +} + +auto +parameters_rule_t:: +parse( + char const*& it, + char const* end) const noexcept -> + system::result +{ + constexpr auto ows = grammar::squelch( + grammar::optional_rule( + grammar::token_rule(detail::ws))); + + return grammar::parse( + it, end, grammar::range_rule( + grammar::tuple_rule( + ows, + grammar::squelch(grammar::literal_rule(";")), + ows, + parameter_rule))); } } // implementation_defined } // http_proto diff --git a/src/rfc/quoted_token_rule.cpp b/src/rfc/quoted_token_rule.cpp index 0d766361..dc0436bb 100644 --- a/src/rfc/quoted_token_rule.cpp +++ b/src/rfc/quoted_token_rule.cpp @@ -81,7 +81,7 @@ parse( return rv.error(); } // quoted-string - auto const it0 = it++; + auto const it0 = ++it; std::size_t n = 0; for(;;) { @@ -116,7 +116,7 @@ parse( ++n; } return value_type(core::string_view( - it0, ++it - it0), n); + it0, it++ - it0), n); } } // implementation_defined diff --git a/test/unit/multipart_form_sink.cpp b/test/unit/multipart_form_sink.cpp new file mode 100644 index 00000000..f1051f97 --- /dev/null +++ b/test/unit/multipart_form_sink.cpp @@ -0,0 +1,54 @@ +// +// Copyright (c) 2025 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_proto +// + +// Test that header file is self-contained. +#include + +#include "test_suite.hpp" + +#include + +namespace boost { +namespace http_proto { + +struct multipart_form_sink_test +{ + void run() + { + core::string_view cs = + "---X\r\n" + "Content-Disposition: form-data; name=\"username\"\r\n" + "\r\n" + "alice\r\n" + // "---X\r\n" + // "Content-Disposition: form-data; name=\"file\"; filename=\"hello.txt\"\r\n" + // "Content-Type: text/plain\r\n" + // "\r\n" + // "Hello world!\r\n" + "---X--\r\n"; + + multipart_form_sink mfs1("-X"); + // multipart_form_sink mfs2("-X"); + auto rv = mfs1.write(buffers::const_buffer{ cs.data(), cs.size()}, false); + BOOST_TEST(!rv.ec.failed()); + // for(auto& c : cs) + // { + // mfs2.write( + // buffers::const_buffer{ &c, 1}, + // &c != &cs[cs.size() - 1]); + // } + } +}; + +TEST_SUITE( + multipart_form_sink_test, + "boost.http_proto.multipart_form_sink"); + +} // http_proto +} // boost diff --git a/test/unit/rfc/content_disposition_rule.cpp b/test/unit/rfc/content_disposition_rule.cpp new file mode 100644 index 00000000..8e62401b --- /dev/null +++ b/test/unit/rfc/content_disposition_rule.cpp @@ -0,0 +1,96 @@ +// +// Copyright (c) 2025 Mohammad Nejati +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/http_proto +// + +// Test that header file is self-contained. +#include + +#include + +#include "test_suite.hpp" + +namespace boost { +namespace http_proto { + +struct content_disposition_rule_test +{ + void + bad(core::string_view s) + { + auto rv = grammar::parse( + s, content_disposition_rule); + BOOST_TEST(rv.has_error()); + } + + void + ok( + core::string_view s, + core::string_view type, + std::initializer_list< + std::pair> init) + { + auto rv = grammar::parse( + s, content_disposition_rule); + if(! BOOST_TEST(rv.has_value())) + return; + auto const& cd = *rv; + BOOST_TEST(cd.type == type); + if(! BOOST_TEST( + cd.params.size() == init.size())) + return; + auto it = cd.params.begin(); + for(std::size_t i = 0; i < init.size(); ++i) + { + auto param = *it++; + BOOST_TEST( + param.name == init.begin()[i].first); + BOOST_TEST( + param.value == init.begin()[i].second); + } + } + + void + testParse() + { + bad(""); + bad(" "); + bad(" inline"); + bad("inline "); + bad(";"); + bad("inline;"); + bad("inline; "); + bad("inline; ;"); + bad("inline; a"); + bad("inline; a="); + bad("inline; =b"); + bad("inline; a=b;"); + + ok("inline;a=b", "inline", {{"a", "b"}}); + ok("inline;a=b;c=d", "inline", {{"a", "b"}, {"c", "d"}}); + ok("inline; a=b", "inline", {{"a", "b"}}); + ok("inline ;a=b; c=d", "inline", {{"a", "b"}, {"c", "d"}}); + ok("inline; a=b", "inline", {{"a", "b"}}); + ok("inline; a=b", "inline", {{"a", "b"}}); + ok("inline ;a=b", "inline", {{"a", "b"}}); + ok("inline ; a=b", "inline", {{"a", "b"}}); + ok("inline ; a=b", "inline", {{"a", "b"}}); + } + + void + run() + { + testParse(); + } +}; + +TEST_SUITE( + content_disposition_rule_test, + "boost.http_proto.content_disposition_rule"); + +} // http_proto +} // boost diff --git a/test/unit/rfc/quoted_token_rule.cpp b/test/unit/rfc/quoted_token_rule.cpp index e70e516c..efd474d0 100644 --- a/test/unit/rfc/quoted_token_rule.cpp +++ b/test/unit/rfc/quoted_token_rule.cpp @@ -18,25 +18,55 @@ namespace http_proto { struct quoted_token_rule_test { void - run() + bad(core::string_view s) + { + http_proto::bad(quoted_token_rule, s); + } + + void + ok( + core::string_view s, + core::string_view r, + std::size_t escapes = 0) { - auto const& t = quoted_token_rule; + auto rv = grammar::parse( + s, quoted_token_rule); + if(! BOOST_TEST(rv.has_value())) + return; + BOOST_TEST(rv.value() == r); + BOOST_TEST( + rv->unescaped_size() == rv->size() - escapes); + } + void + run() + { // token - bad(t, ""); - ok(t, "x"); - ok(t, + bad(""); + bad(" "); + bad("a b"); + ok("x", "x"); + ok( + "!#$%&'*+-.^_`|~" + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "!#$%&'*+-.^_`|~" "0123456789" "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - bad(t, "a b"); // quoted-string - ok(t, "\"\""); - ok(t, "\"x\""); - ok(t, "\"\\,\""); - ok(t, "\"abc\\ def\""); + bad(R"(")"); + bad(R"("" )"); + bad(R"( "")"); + bad(R"(""")"); + bad(R"("\")"); + ok(R"("")", R"()"); + ok(R"("x")", R"(x)"); + ok(R"("\\")", R"(\\)", 1); + ok(R"("abc\ def")", R"(abc\ def)", 1); + ok(R"("a\"b\"c")", R"(a\"b\"c)", 2); } };