Skip to content

Commit 1b71524

Browse files
author
Dean Michael Berris
committed
TDD-made WIP on HTTP Response Incremental Parsing
This initial implementation documents the development of an incremental parsing implementation of HTTP Responses. This is a completely ground-up implementation that is being developed in a test-driven manner. The aim of this effort is two-fold: * Develop a functional incremental parser implementation for HTTP response messages parsing, to be used in the asynchronous HTTP client implementation. * Derive the minimal interface for incremental parsers, and provide a foundation on which to build concepts around incremental parsing for a potential library of these parsers. To be truly generic, the approach aims to develop a specific implementation of an incremental parser (in this case HTTP) and then see if that can be applied to other protocols (i.e. XMPP, SMTP, etc.).
1 parent 0cc40de commit 1b71524

File tree

3 files changed

+263
-1
lines changed

3 files changed

+263
-1
lines changed
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#ifndef BOOST_NETWORK_PROTOCOL_HTTP_PARSER_INCREMENTAL_HPP_20100909
2+
#define BOOST_NETWORK_PROTOCOL_HTTP_PARSER_INCREMENTAL_HPP_20100909
3+
4+
// Copyright Dean Michael Berris 2010.
5+
// Distributed under the Boost Software License, Version 1.0.
6+
// (See accompanying file LICENSE_1_0.txt or copy at
7+
// http://www.boost.org/LICENSE_1_0.txt)
8+
9+
#include <boost/network/tags.hpp>
10+
#include <boost/range.hpp>
11+
#include <boost/fusion/tuple.hpp>
12+
#include <boost/network/traits/string.hpp>
13+
#include <boost/logic/tribool.hpp>
14+
#include <utility>
15+
16+
namespace boost { namespace network { namespace http {
17+
18+
template <class Tag>
19+
struct response_parser {
20+
21+
enum state_t {
22+
http_response_begin,
23+
http_version_h,
24+
http_version_t1,
25+
http_version_t2,
26+
http_version_p,
27+
http_version_slash,
28+
http_version_major,
29+
http_version_dot,
30+
http_version_minor,
31+
http_version_done
32+
};
33+
34+
typedef typename string<Tag>::type::const_iterator iterator_type;
35+
typedef iterator_range<iterator_type> range_type;
36+
37+
response_parser ()
38+
: state_(http_response_begin) {}
39+
response_parser (response_parser const & other)
40+
: state_(other.state_) {}
41+
~response_parser () {}
42+
43+
void swap(response_parser & other) {
44+
std::swap(other.state_, this->state_);
45+
}
46+
47+
response_parser & operator=(response_parser rhs) {
48+
rhs.swap(*this);
49+
return *this;
50+
}
51+
52+
template <class Range>
53+
fusion::tuple<logic::tribool,range_type> parse_until(state_t stop_state, Range & range_) {
54+
logic::tribool parsed_ok(logic::indeterminate);
55+
iterator_type start = boost::begin(range_),
56+
current = start,
57+
end = boost::end(range_);
58+
range_type local_range = boost::make_iterator_range(start, end);
59+
while (!boost::empty(local_range) && indeterminate(parsed_ok)) {
60+
current = boost::begin(local_range);
61+
if (state_ == stop_state) {
62+
parsed_ok = true;
63+
} else {
64+
switch(state_) {
65+
case http_response_begin:
66+
if (*current == ' ' || *current == '\r' || *current == '\n') {
67+
// skip valid leading whitespace
68+
++start;
69+
++current;
70+
} else if (*current == 'H') {
71+
state_ = http_version_h;
72+
start = current;
73+
++current;
74+
} else {
75+
parsed_ok = false;
76+
}
77+
break;
78+
case http_version_h:
79+
if (*current == 'T') {
80+
state_ = http_version_t1;
81+
++current;
82+
} else {
83+
parsed_ok = false;
84+
}
85+
break;
86+
case http_version_t1:
87+
if (*current == 'T') {
88+
state_ = http_version_t2;
89+
++current;
90+
} else {
91+
parsed_ok = false;
92+
}
93+
break;
94+
case http_version_t2:
95+
if (*current == 'P') {
96+
state_ = http_version_p;
97+
++current;
98+
} else {
99+
parsed_ok = false;
100+
}
101+
break;
102+
case http_version_p:
103+
if (*current == '/') {
104+
state_ = http_version_slash;
105+
++current;
106+
} else {
107+
parsed_ok = false;
108+
}
109+
break;
110+
case http_version_slash:
111+
if (*current == '1') {
112+
state_ = http_version_major;
113+
++current;
114+
} else {
115+
parsed_ok = false;
116+
}
117+
break;
118+
case http_version_major:
119+
if (*current == '.') {
120+
state_ = http_version_dot;
121+
++current;
122+
} else {
123+
parsed_ok = false;
124+
}
125+
break;
126+
case http_version_dot:
127+
if (*current == '1' || *current == '0') {
128+
state_ = http_version_minor;
129+
++current;
130+
} else {
131+
parsed_ok = false;
132+
}
133+
break;
134+
case http_version_minor:
135+
if (*current == ' ') {
136+
state_ = http_version_done;
137+
++current;
138+
} else {
139+
parsed_ok = false;
140+
}
141+
break;
142+
default:
143+
parsed_ok = false;
144+
}
145+
}
146+
147+
local_range = boost::make_iterator_range(current, end);
148+
}
149+
if (state_ == stop_state) parsed_ok = true;
150+
return fusion::make_tuple(parsed_ok,boost::make_iterator_range(start, current));
151+
}
152+
153+
state_t state() {
154+
return state_;
155+
}
156+
157+
void reset(state_t new_state = http_response_begin) {
158+
state_ = new_state;
159+
}
160+
161+
private:
162+
state_t state_;
163+
164+
};
165+
166+
167+
} /* http */
168+
169+
} /* network */
170+
171+
} /* boost */
172+
173+
#endif

libs/network/test/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ set(Boost_USE_STATIC_LIBS ON)
1515
set(Boost_USE_MULTITHREADED ON)
1616

1717
if (Boost_FOUND)
18+
add_executable(cpp-netlib-http_incremental_parser http_incremental_parser.cpp)
1819
add_executable(cpp-netlib-hello_world hello_world.cpp)
1920
add_executable(cpp-netlib-http_1_0_test http_1_0_test.cpp)
2021
add_executable(cpp-netlib-http_1_1_test http_1_1_test.cpp)
@@ -24,6 +25,7 @@ if (Boost_FOUND)
2425
add_executable(cpp-netlib-http_message_test http_message_test.cpp)
2526
add_executable(cpp-netlib-message_transform_test message_transform_test.cpp)
2627
add_executable(cpp-netlib-url_test url_test.cpp)
28+
target_link_libraries(cpp-netlib-http_incremental_parser ${CMAKE_THREAD_LIBS_INIT} ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
2729
target_link_libraries(cpp-netlib-hello_world ${Boost_SYSTEM_LIBRARY} ${Boost_REGEX_LIBRARY} ${Boost_DATE_TIME_LIBRARY} ${Boost_THREAD_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
2830
target_link_libraries(cpp-netlib-http_1_0_test ${Boost_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
2931
target_link_libraries(cpp-netlib-http_1_1_test ${Boost_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
@@ -44,7 +46,8 @@ if (Boost_FOUND)
4446
target_link_libraries(cpp-netlib-https_localhost_tests ${OPENSSL_LIBRARIES} )
4547
target_link_libraries(cpp-netlib-url_test ${OPENSSL_LIBRARIES} )
4648
endif()
47-
set_target_properties(cpp-netlib-hello_world cpp-netlib-http_1_0_test cpp-netlib-http_1_1_test cpp-netlib-message_test cpp-netlib-http_message_test cpp-netlib-message_transform_test cpp-netlib-http_localhost_tests cpp-netlib-https_localhost_tests cpp-netlib-url_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../../../build/tests)
49+
set_target_properties(cpp-netlib-http_incremental_parser cpp-netlib-hello_world cpp-netlib-http_1_0_test cpp-netlib-http_1_1_test cpp-netlib-message_test cpp-netlib-http_message_test cpp-netlib-message_transform_test cpp-netlib-http_localhost_tests cpp-netlib-https_localhost_tests cpp-netlib-url_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../../../build/tests)
50+
add_test(cpp-netlib-http_incremental_parser ../../../build/tests/cpp-netlib-http_incremental_parser)
4851
add_test(cpp-netlib-hello_world python httplib_acceptance.py ../../../build/tests/cpp-netlib-hello_world ../../../build/tests/cpp-netlib-hello_world.passed)
4952
add_test(cpp-netlib-http_1_0_test ../../../build/tests/cpp-netlib-http_1_0_test)
5053
add_test(cpp-netlib-http_1_1_test ../../../build/tests/cpp-netlib-http_1_1_test)
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
2+
// Copyright Dean Michael Berris 2010.
3+
// Distributed under the Boost Software License, Version 1.0.
4+
// (See accompanying file LICENSE_1_0.txt or copy at
5+
// http://www.boost.org/LICENSE_1_0.txt)
6+
7+
#define BOOST_TEST_MODULE HTTP Incremental Parser Test
8+
#include <boost/config/warning_disable.hpp>
9+
#include <boost/test/unit_test.hpp>
10+
#include <boost/network/protocol/http/parser/incremental.hpp>
11+
#include <boost/range.hpp>
12+
#include <boost/logic/tribool.hpp>
13+
#include <string>
14+
#include <iostream>
15+
16+
/** Synopsis
17+
*
18+
* Test for HTTP Response Incremental Parser
19+
* -----------------------------------------
20+
*
21+
* In this test we fully intend to specify how an incremental
22+
* HTTP Response parser should be used. This defines the bare
23+
* minimum implementation for an Incremental Parser concept,
24+
* and shall follow an interface that puts a premium on simplicity.
25+
*
26+
* The motivation for coming up with a re-startable stateful
27+
* incremental parser comes from the requirement in the asynchronous
28+
* HTTP client implementation that allows for parsing an HTTP
29+
* response as the data comes in. By being able to process some
30+
* parts of the message ahead of others, we are allowed to set
31+
* the promise values bound to futures that the users of the client
32+
* would be waiting on.
33+
*
34+
* The basic interface that we're looking for is a means of providing:
35+
* - a range of input
36+
* - a completion function once a certain state is reached
37+
* - a means of resetting the parser's state
38+
*
39+
* One of the possible implementations can use the Boost.MSM library
40+
* to create the state machine. The test however does not specify what
41+
* implementation should be used, but rather that the interface and the
42+
* semantics are according to expectations.
43+
*
44+
* Date: September 9, 2010
45+
* Author: Dean Michael Berris <mikhailberis@gmail.com>
46+
*/
47+
48+
namespace tags = boost::network::tags;
49+
namespace logic = boost::logic;
50+
namespace fusion = boost::fusion;
51+
using namespace boost::network::http;
52+
53+
BOOST_AUTO_TEST_CASE(incremental_parser_constructor) {
54+
response_parser<tags::default_string> p; // default constructible
55+
}
56+
57+
/** In this test we want to be able to parse incrementally a
58+
* range passed in as input, and specify to the parser that
59+
* it should stop when we reach a certain state. In this case
60+
* we want it to parse until it either finds the HTTP version
61+
* or there is an error encountered.
62+
*/
63+
BOOST_AUTO_TEST_CASE(incremental_parser_parse_http_version) {
64+
response_parser<tags::default_string> p; // default constructible
65+
logic::tribool parsed_ok = false;
66+
typedef response_parser<tags::default_string>::range_type range_type;
67+
range_type result_range;
68+
69+
std::string valid_http_version = "HTTP/1.0 ";
70+
fusion::tie(parsed_ok, result_range) = p.parse_until(
71+
response_parser<tags::default_string>::http_version_done,
72+
valid_http_version);
73+
BOOST_CHECK_EQUAL(parsed_ok, true);
74+
BOOST_CHECK(!boost::empty(result_range));
75+
std::string parsed(boost::begin(result_range), boost::end(result_range));
76+
std::cout << "PARSED: " << parsed << " state=" << p.state() << std::endl;
77+
p.reset();
78+
valid_http_version = "HTTP/1.1 ";
79+
fusion::tie(parsed_ok, result_range) = p.parse_until(
80+
response_parser<tags::default_string>::http_version_done,
81+
valid_http_version);
82+
BOOST_CHECK_EQUAL(parsed_ok, true);
83+
BOOST_CHECK(!boost::empty(result_range));
84+
parsed = std::string(boost::begin(result_range), boost::end(result_range));
85+
std::cout << "PARSED: " << parsed << " state=" << p.state() << std::endl;
86+
}

0 commit comments

Comments
 (0)