Skip to content

Introduced new chunked transfer encoding parser to remove chunk markers #720

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions boost/network/protocol/http/client/async_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,16 @@ struct async_client
typedef std::function<bool(string_type&)> body_generator_function_type;

async_client(bool cache_resolved, bool follow_redirect,
bool always_verify_peer, int timeout,
bool always_verify_peer, int timeout, bool remove_chunk_markers,
std::shared_ptr<boost::asio::io_service> service,
optional<string_type> certificate_filename,
optional<string_type> verify_path,
optional<string_type> certificate_file,
optional<string_type> private_key_file,
optional<string_type> ciphers,
optional<string_type> sni_hostname, long ssl_options)
: connection_base(cache_resolved, follow_redirect, timeout),
: connection_base(cache_resolved, follow_redirect, timeout,
remove_chunk_markers),
service_ptr(service.get() ? service
: std::make_shared<boost::asio::io_service>()),
service_(*service_ptr),
Expand Down
4 changes: 3 additions & 1 deletion boost/network/protocol/http/client/connection/async_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct async_connection_base {
static connection_ptr new_connection(
resolve_function resolve, resolver_type &resolver, bool follow_redirect,
bool always_verify_peer, bool https, int timeout,
bool remove_chunk_markers,
optional<string_type> certificate_filename = optional<string_type>(),
optional<string_type> const &verify_path = optional<string_type>(),
optional<string_type> certificate_file = optional<string_type>(),
Expand All @@ -59,7 +60,8 @@ struct async_connection_base {
certificate_filename, verify_path, certificate_file, private_key_file,
ciphers, sni_hostname, ssl_options);
auto temp = std::make_shared<async_connection>(
resolver, resolve, follow_redirect, timeout, std::move(delegate));
resolver, resolve, follow_redirect, timeout, remove_chunk_markers,
std::move(delegate));
BOOST_ASSERT(temp != nullptr);
return temp;
}
Expand Down
151 changes: 112 additions & 39 deletions boost/network/protocol/http/client/connection/async_normal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,82 @@ namespace network {
namespace http {
namespace impl {

template <class Tag>
struct chunk_encoding_parser {
chunk_encoding_parser() : state(state_t::header), chunk_size(0) {}

enum class state_t { header, header_end, data, data_end };

state_t state;
size_t chunk_size;
std::array<typename char_<Tag>::type, 1024> buffer;

void update_chunk_size(
boost::iterator_range<typename std::array<
typename char_<Tag>::type, 1024>::const_iterator> const &range) {
if (range.empty()) return;
std::stringstream ss;
ss << std::hex << range;
size_t size;
ss >> size;
// New digits are appended as LSBs
chunk_size = (chunk_size << (range.size() * 4)) | size;
}

boost::iterator_range<
typename std::array<typename char_<Tag>::type, 1024>::const_iterator>
operator()(
boost::iterator_range<typename std::array<
typename char_<Tag>::type, 1024>::const_iterator> const &range) {
auto iter = boost::begin(range);
auto begin = iter;
auto pos = boost::begin(buffer);

while (iter != boost::end(range)) switch (state) {
case state_t::header:
iter = std::find(iter, boost::end(range), '\r');
update_chunk_size(boost::make_iterator_range(begin, iter));
if (iter != boost::end(range)) {
state = state_t::header_end;
++iter;
}
break;

case state_t::header_end:
BOOST_ASSERT(*iter == '\n');
++iter;
state = state_t::data;
break;

case state_t::data:
if (chunk_size == 0) {
BOOST_ASSERT(*iter == '\r');
++iter;
state = state_t::data_end;
} else {
auto len = std::min(chunk_size,
(size_t)std::distance(iter, boost::end(range)));
begin = iter;
iter = std::next(iter, len);
pos = std::copy(begin, iter, pos);
chunk_size -= len;
}
break;

case state_t::data_end:
BOOST_ASSERT(*iter == '\n');
++iter;
begin = iter;
state = state_t::header;
break;

default:
BOOST_ASSERT(false && "Bug, report this to the developers!");
}
return boost::make_iterator_range(boost::begin(buffer), pos);
}
};

template <class Tag, unsigned version_major, unsigned version_minor>
struct async_connection_base;

Expand Down Expand Up @@ -72,8 +148,10 @@ struct http_async_connection

http_async_connection(resolver_type& resolver, resolve_function resolve,
bool follow_redirect, int timeout,
bool remove_chunk_markers,
connection_delegate_ptr delegate)
: timeout_(timeout),
remove_chunk_markers_(remove_chunk_markers),
timer_(resolver.get_io_service()),
is_timedout_(false),
follow_redirect_(follow_redirect),
Expand Down Expand Up @@ -348,8 +426,11 @@ struct http_async_connection

// The invocation of the callback is synchronous to allow us to
// wait before scheduling another read.
callback(make_iterator_range(begin, end), ec);

if (this->is_chunk_encoding && remove_chunk_markers_) {
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec);
} else {
callback(make_iterator_range(begin, end), ec);
}
auto self = this->shared_from_this();
delegate_->read_some(
boost::asio::mutable_buffers_1(this->part.data(),
Expand Down Expand Up @@ -388,14 +469,31 @@ struct http_async_connection
// We call the callback function synchronously passing the error
// condition (in this case, end of file) so that it can handle it
// appropriately.
callback(make_iterator_range(begin, end), ec);
if (this->is_chunk_encoding && remove_chunk_markers_) {
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec);
} else {
callback(make_iterator_range(begin, end), ec);
}
} else {
string_type body_string;
std::swap(body_string, this->partial_parsed);
body_string.append(this->part.begin(), this->part.begin() + bytes_transferred);
if (this->is_chunk_encoding) {
this->body_promise.set_value(parse_chunk_encoding(body_string));
if (this->is_chunk_encoding && remove_chunk_markers_) {
Copy link

@igorpeshansky igorpeshansky Mar 7, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@umennel Careful -- this actually breaks previous behavior. Now, unless you set remove_chunk_markers to true, the following code will produce a chunked-encoded string:

http::client client;
http::client::request request("http://www.boost.org:80/");
http::client::response response = client.get(request);
std::cerr << body(response) << std::endl;

@deanberris Is this what we want?

To put it another way, can you think of a case where we wouldn't want remove_chunk_markers to default to true?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@igorpeshansky, indeed, the default should be true. Otherwise it is hard to keep the behavior consistent. We should decide on this before merging into 0.13.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, I'll send a PR shortly.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sent #830.

for (size_t i = 0; i < this->partial_parsed.size(); i += 1024) {
auto range = parse_chunk_encoding(boost::make_iterator_range(
this->partial_parsed.data() + i,
this->partial_parsed.data() +
std::min(i + 1024, this->partial_parsed.size())));
body_string.append(boost::begin(range), boost::end(range));
}
this->partial_parsed.clear();
auto range = parse_chunk_encoding(boost::make_iterator_range(
this->part.begin(),
this->part.begin() + bytes_transferred));
body_string.append(boost::begin(range), boost::end(range));
this->body_promise.set_value(body_string);
} else {
std::swap(body_string, this->partial_parsed);
body_string.append(this->part.begin(),
this->part.begin() + bytes_transferred);
this->body_promise.set_value(body_string);
}
}
Expand All @@ -417,7 +515,11 @@ struct http_async_connection
this->part.begin();
typename protocol_base::buffer_type::const_iterator end = begin;
std::advance(end, bytes_transferred);
callback(make_iterator_range(begin, end), ec);
if (this->is_chunk_encoding && remove_chunk_markers_) {
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec);
} else {
callback(make_iterator_range(begin, end), ec);
}
auto self = this->shared_from_this();
delegate_->read_some(
boost::asio::mutable_buffers_1(this->part.data(),
Expand Down Expand Up @@ -476,38 +578,8 @@ struct http_async_connection
}
}

string_type parse_chunk_encoding(string_type& body_string) {
string_type body;
string_type crlf = "\r\n";

typename string_type::iterator begin = body_string.begin();
for (typename string_type::iterator iter =
std::search(begin, body_string.end(), crlf.begin(), crlf.end());
iter != body_string.end();
iter =
std::search(begin, body_string.end(), crlf.begin(), crlf.end())) {
string_type line(begin, iter);
if (line.empty()) {
break;
}
std::stringstream stream(line);
int len;
stream >> std::hex >> len;
std::advance(iter, 2);
if (len == 0) {
break;
}
if (len <= body_string.end() - iter) {
body.insert(body.end(), iter, iter + len);
std::advance(iter, len + 2);
}
begin = iter;
}

return body;
}

int timeout_;
bool remove_chunk_markers_;
boost::asio::steady_timer timer_;
bool is_timedout_;
bool follow_redirect_;
Expand All @@ -517,6 +589,7 @@ struct http_async_connection
connection_delegate_ptr delegate_;
boost::asio::streambuf command_streambuf;
string_type method;
chunk_encoding_parser<Tag> parse_chunk_encoding;
};

} // namespace impl
Expand Down
3 changes: 2 additions & 1 deletion boost/network/protocol/http/client/facade.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ class basic_client_facade {
options.openssl_verify_path(), options.openssl_certificate_file(),
options.openssl_private_key_file(), options.openssl_ciphers(),
options.openssl_sni_hostname(), options.openssl_options(),
options.io_service(), options.timeout()));
options.io_service(), options.timeout(),
options.remove_chunk_markers()));
}
};

Expand Down
16 changes: 14 additions & 2 deletions boost/network/protocol/http/client/options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ class client_options {
openssl_options_(0),
io_service_(),
always_verify_peer_(true),
timeout_(0) {}
timeout_(0),
remove_chunk_markers_(false) {}

client_options(client_options const& other)
: cache_resolved_(other.cache_resolved_),
Expand All @@ -48,7 +49,8 @@ class client_options {
openssl_options_(other.openssl_options_),
io_service_(other.io_service_),
always_verify_peer_(other.always_verify_peer_),
timeout_(other.timeout_) {}
timeout_(other.timeout_),
remove_chunk_markers_(other.remove_chunk_markers) {}

client_options& operator=(client_options other) {
other.swap(*this);
Expand All @@ -69,6 +71,7 @@ class client_options {
swap(io_service_, other.io_service_);
swap(always_verify_peer_, other.always_verify_peer_);
swap(timeout_, other.timeout_);
swap(remove_chunk_markers_, other.remove_chunk_markers_);
}

/// Specify whether the client should cache resolved endpoints.
Expand Down Expand Up @@ -154,6 +157,12 @@ class client_options {
return *this;
}

/// Set an overall timeout for HTTP requests.
client_options& remove_chunk_markers(bool v) {
remove_chunk_markers_ = v;
return *this;
}

bool cache_resolved() const { return cache_resolved_; }

bool follow_redirects() const { return follow_redirects_; }
Expand Down Expand Up @@ -190,6 +199,8 @@ class client_options {

int timeout() const { return timeout_; }

bool remove_chunk_markers() const { return remove_chunk_markers_; }

private:
bool cache_resolved_;
bool follow_redirects_;
Expand All @@ -203,6 +214,7 @@ class client_options {
std::shared_ptr<boost::asio::io_service> io_service_;
bool always_verify_peer_;
int timeout_;
bool remove_chunk_markers_;
};

template <class Tag>
Expand Down
10 changes: 6 additions & 4 deletions boost/network/protocol/http/client/pimpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,12 @@ struct basic_client_impl
optional<string_type> const& private_key_file,
optional<string_type> const& ciphers,
optional<string_type> const& sni_hostname, long ssl_options,
std::shared_ptr<boost::asio::io_service> service, int timeout)
: base_type(cache_resolved, follow_redirect, always_verify_peer, timeout,
service, certificate_filename, verify_path, certificate_file,
private_key_file, ciphers, sni_hostname, ssl_options) {}
std::shared_ptr<boost::asio::io_service> service, int timeout,
bool remove_chunk_markers)
: base_type(cache_resolved, follow_redirect, always_verify_peer, timeout,
remove_chunk_markers, service, certificate_filename, verify_path,
certificate_file, private_key_file, ciphers, sni_hostname,
ssl_options) {}

~basic_client_impl() = default;
};
Expand Down
18 changes: 10 additions & 8 deletions boost/network/protocol/http/policies/async_connection.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct async_connection_policy : resolver_policy<Tag>::type {
struct connection_impl {
connection_impl(
bool follow_redirect, bool always_verify_peer, resolve_function resolve,
resolver_type& resolver, bool https, int timeout,
resolver_type& resolver, bool https, int timeout, bool remove_chunk_markers,
optional<string_type> /*unused*/ const& certificate_filename,
optional<string_type> const& verify_path,
optional<string_type> const& certificate_file,
Expand All @@ -47,9 +47,9 @@ struct async_connection_policy : resolver_policy<Tag>::type {
optional<string_type> const& sni_hostname, long ssl_options) {
pimpl = impl::async_connection_base<Tag, version_major, version_minor>::
new_connection(resolve, resolver, follow_redirect, always_verify_peer,
https, timeout, certificate_filename, verify_path,
certificate_file, private_key_file, ciphers,
sni_hostname, ssl_options);
https, timeout, remove_chunk_markers,
certificate_filename, verify_path, certificate_file,
private_key_file, ciphers, sni_hostname, ssl_options);
}

basic_response<Tag> send_request(string_type /*unused*/ const& method,
Expand Down Expand Up @@ -86,20 +86,22 @@ struct async_connection_policy : resolver_policy<Tag>::type {
this->resolve(resolver, host, port, once_resolved);
},
resolver, boost::iequals(protocol_, string_type("https")), timeout_,
certificate_filename, verify_path, certificate_file, private_key_file,
ciphers, sni_hostname, ssl_options);
remove_chunk_markers_, certificate_filename, verify_path,
certificate_file, private_key_file, ciphers, sni_hostname, ssl_options);
}

void cleanup() {}

async_connection_policy(bool cache_resolved, bool follow_redirect,
int timeout)
int timeout, bool remove_chunk_markers)
: resolver_base(cache_resolved),
follow_redirect_(follow_redirect),
timeout_(timeout) {}
timeout_(timeout),
remove_chunk_markers_(remove_chunk_markers) {}

bool follow_redirect_;
int timeout_;
bool remove_chunk_markers_;
};

} // namespace http
Expand Down
9 changes: 6 additions & 3 deletions libs/network/test/http/client_get_different_port_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,12 @@ namespace http = boost::network::http;
TYPED_TEST_CASE(HTTPClientTest, ClientTypes);

TYPED_TEST(HTTPClientTest, GetDifferentPort) {
TypeParam client;
typename TypeParam::request r("http://www.boost.org:80/");
auto response_ = client.get(r);
using client = TypeParam;
typename client::options options;
options.remove_chunk_markers(true);
client client_;
typename TypeParam::request request("http://www.boost.org:80/");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know this might be out-of-scope, but could you make this point to cpp-netlib.org instead? That would be great, thanks. :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately cpp-netlib.org does not yield chunk encoded content. It does not matter much for this particular test, but for the streaming test we'd have all scenarios covered with www.boost.org: it yields chunk encoded content for http/1.1 clients and non encoded content for http/1.0 clients.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good point, thanks.

auto response_ = client_.get(request);
auto range = headers(response_)["Content-Type"];
EXPECT_TRUE(std::begin(range) != std::end(range));
EXPECT_NE(0, body(response_).size());
Expand Down
Loading