-
Notifications
You must be signed in to change notification settings - Fork 425
Introduced new chunked transfer encoding parser to remove chunk markers #720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
673b853
c4d758a
e9bf5f7
9586a81
c70f774
ff5c7c8
192d520
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…rs in streaming clients.
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
|
||
#include <iterator> | ||
#include <cstdint> | ||
#include <iostream> | ||
#include <boost/algorithm/string/trim.hpp> | ||
#include <boost/asio/steady_timer.hpp> | ||
#include <boost/asio/placeholders.hpp> | ||
|
@@ -37,6 +38,77 @@ namespace network { | |
namespace http { | ||
namespace impl { | ||
|
||
template <class Tag> | ||
struct chunk_encoding_parser { | ||
|
||
chunk_encoding_parser() : state(state_t::header), chunk_size(0) {} | ||
|
||
enum state_t { header, header_end, data, data_end }; | ||
|
||
state_t state; | ||
size_t chunk_size; | ||
std::array<typename char_<Tag>::type, 1024> buffer; | ||
|
||
void update_chunk_size(boost::iterator_range<typename std::array<typename char_<Tag>::type, 1024>::const_iterator> const& range) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this is a really long line. We prefer using |
||
if (range.empty()) | ||
return; | ||
std::stringstream ss; | ||
ss << std::hex << range; | ||
size_t size; | ||
ss >> size; | ||
chunk_size = (chunk_size << (range.size()*4)) + size; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you help me understand this math? If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, but range must correspond to the number of digits and 1024 digits would be an unrealistically large number (it would not throw an error though cause there is no overflow checking). I replaced the '+' with a '|' to make it more clear that this expression is just about appending the new bits to the lower end (and shifting the existing bits by the number of the new bits). Every hex digit corresponds to a quadruple of bits, that's why i multiply the number of digits by four. I didn't want to introduce another char buffer to accumulate the hex digits, so I decided to incrementally compute the chunk size. |
||
} | ||
|
||
boost::iterator_range<typename std::array<typename char_<Tag>::type, 1024>::const_iterator> operator()(boost::iterator_range<typename std::array<typename char_<Tag>::type, 1024>::const_iterator> const& range) { | ||
auto iter = boost::begin(range); | ||
auto begin = iter; | ||
auto pos = boost::begin(buffer); | ||
|
||
while (iter != boost::end(range)) | ||
switch(state) { | ||
case state_t::header: | ||
iter = std::find(iter, boost::end(range), '\r'); | ||
update_chunk_size(boost::make_iterator_range(begin, iter)); | ||
if (iter != boost::end(range)) { | ||
state = state_t::header_end; | ||
++iter; | ||
} | ||
break; | ||
|
||
case state_t::header_end: | ||
BOOST_ASSERT(*iter == '\n'); | ||
++iter; | ||
state = state_t::data; | ||
break; | ||
|
||
case state_t::data: | ||
if (chunk_size == 0) { | ||
BOOST_ASSERT(*iter == '\r'); | ||
++iter; | ||
state = state_t::data_end; | ||
} else { | ||
auto len = std::min(chunk_size, (size_t)std::distance(iter, boost::end(range))); | ||
begin = iter; | ||
iter = std::next(iter, len); | ||
pos = std::copy(begin, iter, pos); | ||
chunk_size -= len; | ||
} | ||
break; | ||
|
||
case state_t::data_end: | ||
BOOST_ASSERT (*iter == '\n'); | ||
++iter; | ||
begin = iter; | ||
state = state_t::header; | ||
break; | ||
|
||
default: | ||
BOOST_ASSERT(false && "Bug, report this to the developers!"); | ||
} | ||
return boost::make_iterator_range(boost::begin(buffer), pos); | ||
} | ||
}; | ||
|
||
template <class Tag, unsigned version_major, unsigned version_minor> | ||
struct async_connection_base; | ||
|
||
|
@@ -72,8 +144,10 @@ struct http_async_connection | |
|
||
http_async_connection(resolver_type& resolver, resolve_function resolve, | ||
bool follow_redirect, int timeout, | ||
bool remove_chunk_markers, | ||
connection_delegate_ptr delegate) | ||
: timeout_(timeout), | ||
remove_chunk_markers_(remove_chunk_markers), | ||
timer_(resolver.get_io_service()), | ||
is_timedout_(false), | ||
follow_redirect_(follow_redirect), | ||
|
@@ -348,8 +422,11 @@ struct http_async_connection | |
|
||
// The invocation of the callback is synchronous to allow us to | ||
// wait before scheduling another read. | ||
callback(make_iterator_range(begin, end), ec); | ||
|
||
if (this->is_chunk_encoding && remove_chunk_markers_) { | ||
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); | ||
} else { | ||
callback(make_iterator_range(begin, end), ec); | ||
} | ||
auto self = this->shared_from_this(); | ||
delegate_->read_some( | ||
boost::asio::mutable_buffers_1(this->part.data(), | ||
|
@@ -388,14 +465,28 @@ struct http_async_connection | |
// We call the callback function synchronously passing the error | ||
// condition (in this case, end of file) so that it can handle it | ||
// appropriately. | ||
callback(make_iterator_range(begin, end), ec); | ||
if (this->is_chunk_encoding && remove_chunk_markers_) { | ||
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); | ||
} else { | ||
callback(make_iterator_range(begin, end), ec); | ||
} | ||
} else { | ||
string_type body_string; | ||
std::swap(body_string, this->partial_parsed); | ||
body_string.append(this->part.begin(), this->part.begin() + bytes_transferred); | ||
if (this->is_chunk_encoding) { | ||
this->body_promise.set_value(parse_chunk_encoding(body_string)); | ||
if (this->is_chunk_encoding && remove_chunk_markers_) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @umennel Careful -- this actually breaks previous behavior. Now, unless you set http::client client;
http::client::request request("http://www.boost.org:80/");
http::client::response response = client.get(request);
std::cerr << body(response) << std::endl; @deanberris Is this what we want? To put it another way, can you think of a case where we wouldn't want There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @igorpeshansky, indeed, the default should be true. Otherwise it is hard to keep the behavior consistent. We should decide on this before merging into 0.13. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great, I'll send a PR shortly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sent #830. |
||
for (size_t i = 0; i < this->partial_parsed.size(); i += 1024) { | ||
auto range = parse_chunk_encoding( | ||
boost::make_iterator_range(this->partial_parsed.data() + i, | ||
this->partial_parsed.data() + std::min(i+1024, this->partial_parsed.size()))); | ||
body_string.append(boost::begin(range), boost::end(range)); | ||
} | ||
this->partial_parsed.clear(); | ||
auto range = parse_chunk_encoding(boost::make_iterator_range(this->part.begin(), | ||
this->part.begin() + bytes_transferred)); | ||
body_string.append(boost::begin(range), boost::end(range)); | ||
this->body_promise.set_value(body_string); | ||
} else { | ||
std::swap(body_string, this->partial_parsed); | ||
body_string.append(this->part.begin(), this->part.begin() + bytes_transferred); | ||
this->body_promise.set_value(body_string); | ||
} | ||
} | ||
|
@@ -417,7 +508,11 @@ struct http_async_connection | |
this->part.begin(); | ||
typename protocol_base::buffer_type::const_iterator end = begin; | ||
std::advance(end, bytes_transferred); | ||
callback(make_iterator_range(begin, end), ec); | ||
if (this->is_chunk_encoding && remove_chunk_markers_) { | ||
callback(parse_chunk_encoding(make_iterator_range(begin, end)), ec); | ||
} else { | ||
callback(make_iterator_range(begin, end), ec); | ||
} | ||
auto self = this->shared_from_this(); | ||
delegate_->read_some( | ||
boost::asio::mutable_buffers_1(this->part.data(), | ||
|
@@ -476,38 +571,39 @@ struct http_async_connection | |
} | ||
} | ||
|
||
string_type parse_chunk_encoding(string_type& body_string) { | ||
string_type body; | ||
string_type crlf = "\r\n"; | ||
|
||
typename string_type::iterator begin = body_string.begin(); | ||
for (typename string_type::iterator iter = | ||
std::search(begin, body_string.end(), crlf.begin(), crlf.end()); | ||
iter != body_string.end(); | ||
iter = | ||
std::search(begin, body_string.end(), crlf.begin(), crlf.end())) { | ||
string_type line(begin, iter); | ||
if (line.empty()) { | ||
break; | ||
} | ||
std::stringstream stream(line); | ||
int len; | ||
stream >> std::hex >> len; | ||
std::advance(iter, 2); | ||
if (len == 0) { | ||
break; | ||
} | ||
if (len <= body_string.end() - iter) { | ||
body.insert(body.end(), iter, iter + len); | ||
std::advance(iter, len + 2); | ||
} | ||
begin = iter; | ||
} | ||
// string_type parse_chunk_encoding(string_type& body_string) { | ||
// string_type body; | ||
// string_type crlf = "\r\n"; | ||
|
||
return body; | ||
} | ||
// typename string_type::iterator begin = body_string.begin(); | ||
// for (typename string_type::iterator iter = | ||
// std::search(begin, body_string.end(), crlf.begin(), crlf.end()); | ||
// iter != body_string.end(); | ||
// iter = | ||
// std::search(begin, body_string.end(), crlf.begin(), crlf.end())) { | ||
// string_type line(begin, iter); | ||
// if (line.empty()) { | ||
// break; | ||
// } | ||
// std::stringstream stream(line); | ||
// int len; | ||
// stream >> std::hex >> len; | ||
// std::advance(iter, 2); | ||
// if (len == 0) { | ||
// break; | ||
// } | ||
// if (len <= body_string.end() - iter) { | ||
// body.insert(body.end(), iter, iter + len); | ||
// std::advance(iter, len + 2); | ||
// } | ||
// begin = iter; | ||
// } | ||
|
||
// return body; | ||
// } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider just removing these lines? |
||
|
||
int timeout_; | ||
bool remove_chunk_markers_; | ||
boost::asio::steady_timer timer_; | ||
bool is_timedout_; | ||
bool follow_redirect_; | ||
|
@@ -517,6 +613,7 @@ struct http_async_connection | |
connection_delegate_ptr delegate_; | ||
boost::asio::streambuf command_streambuf; | ||
string_type method; | ||
chunk_encoding_parser<Tag> parse_chunk_encoding; | ||
}; | ||
|
||
} // namespace impl | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can use an
enum class
here instead.