recode URIParser

This commit is contained in:
wzl12356
2021-09-08 09:54:18 +08:00
parent 8cca0fcd11
commit 5d349f8e0f
4 changed files with 462 additions and 375 deletions

View File

@@ -14,6 +14,7 @@
limitations under the License.
Authors: Wu Jiaxu (wujiaxu@sogou-inc.com)
Wang Zhulei (wangzhulei@sogou-inc.com)
*/
#include <string.h>
@@ -22,131 +23,22 @@
#include <utility>
#include "StringUtil.h"
#include "URIParser.h"
/*
static bool is_unreserved[256];
static bool is_sub_delims[256];
static bool is_pchar[256];
static constexpr char sep[4] = {':', '/', '?', '#'};
static bool valid_char[7][256];
class __Init
enum
{
public:
__Init()
{
is_unreserved[(unsigned char)'-'] =
is_unreserved[(unsigned char)'.'] =
is_unreserved[(unsigned char)'_'] =
is_unreserved[(unsigned char)'~'] = true;
for (int i = 0; i < 256; i++)
{
if (isalnum(i))
is_unreserved[i] = true;
}
is_sub_delims[(unsigned char)'!'] =
is_sub_delims[(unsigned char)'$'] =
is_sub_delims[(unsigned char)'&'] =
is_sub_delims[(unsigned char)'\''] =
is_sub_delims[(unsigned char)'('] =
is_sub_delims[(unsigned char)')'] =
is_sub_delims[(unsigned char)'*'] =
is_sub_delims[(unsigned char)'+'] =
is_sub_delims[(unsigned char)','] =
is_sub_delims[(unsigned char)';'] =
is_sub_delims[(unsigned char)'='] = true;
is_pchar[(unsigned char)'%'] =
is_pchar[(unsigned char)':'] =
is_pchar[(unsigned char)'@'] = true;
for (int i = 0; i < 256; i++)
{
if (is_unreserved[i] || is_sub_delims[i])
is_pchar[i] = true;
}
for (int i = 0; i < 7; i++)
{
bool *arr = valid_char[i];
switch (i)
{
case 0://scheme
arr[(unsigned char)'+'] =
arr[(unsigned char)'-'] =
arr[(unsigned char)'.'] = true;
for (int i = 0; i < 256; i++)
{
if (isalnum(i))
arr[i] = true;
}
break;
case 1://userinfo
arr[(unsigned char)':'] =
arr[(unsigned char)'%'] = true;
for (int i = 0; i < 256; i++)
{
if (is_unreserved[i] || is_sub_delims[i])
arr[i] = true;
}
break;
case 2://host
arr[(unsigned char)'%'] = true;
for (int i = 0; i < 256; i++)
{
if (is_unreserved[i] || is_sub_delims[i])
arr[i] = true;
}
break;
case 3://port
for (int i = 0; i < 256; i++)
{
if (isdigit(i))
arr[i] = true;
}
break;
case 4://path
arr[(unsigned char)'/'] = true;
for (int i = 0; i < 256; i++)
{
if (is_pchar[i])
arr[i] = true;
}
break;
case 5://query
case 6://fragment
arr[(unsigned char)'/'] =
arr[(unsigned char)'?'] = true;
for (int i = 0; i < 256; i++)
{
if (is_pchar[i])
arr[i] = true;
}
break;
default:
break;
}
}
}
URI_SCHEME,
URI_USERINFO,
URI_HOST,
URI_PORT,
URI_QUERY,
URI_FRAGMENT,
URI_PATH,
URI_PART_ELEMENTS,
};
static __Init g_init;
*/
static constexpr char sep[4] = {':', '/', '?', '#'};
//scheme://[userinfo@]host[:port][/path][?query][#fragment]
//0-6 (scheme, userinfo, host, port, path, query, fragment)
static constexpr bool valid_char[7][256] = {
static constexpr unsigned char valid_char[URI_PART_ELEMENTS][256] = {
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -189,7 +81,7 @@ static constexpr bool valid_char[7][256] = {
0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -219,7 +111,6 @@ static constexpr bool valid_char[7][256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -239,87 +130,80 @@ static constexpr bool valid_char[7][256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
}
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
},
};
void ParsedURI::deinit()
{
if (scheme)
{
free(scheme);
scheme = NULL;
}
static unsigned char authority_map[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, URI_FRAGMENT, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, URI_PATH,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, URI_HOST, 0, 0, 0, 0, URI_QUERY,
URI_USERINFO, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
if (host)
{
free(host);
host = NULL;
}
if (path)
{
free(path);
path = NULL;
}
if (userinfo)
{
free(userinfo);
userinfo = NULL;
}
if (port)
{
free(port);
port = NULL;
}
if (query)
{
free(query);
query = NULL;
}
if (fragment)
{
free(fragment);
fragment = NULL;
}
}
static unsigned char path_map[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, URI_FRAGMENT, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, URI_QUERY,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
void ParsedURI::__copy(const ParsedURI& copy)
{
@@ -394,148 +278,140 @@ void ParsedURI::__copy(const ParsedURI& copy)
}
}
void ParsedURI::__move(ParsedURI&& move)
{
scheme = move.scheme;
userinfo = move.userinfo;
host = move.host;
port = move.port;
path = move.path;
query = move.query;
fragment = move.fragment;
state = move.state;
error = move.error;
move.init();
}
int URIParser::parse(const char *str, ParsedURI& uri)
{
uri.state = URI_STATE_INVALID;
if (!str[0])
return -1;//uri empty
if (!isalpha((unsigned char)str[0]))
return -1;//uri first char must be alpha
int start_idx[URI_PART_ELEMENTS] = {0};
int end_idx[URI_PART_ELEMENTS] = {0};
int state = URI_SCHEME;
int pre_state = URI_SCHEME;;
int i;
bool in_ipv6 = false;
int st[7] = {0};
int ed[7] = {0};
int cur = 1;
while (valid_char[0][(unsigned char)str[cur]])
cur++;
ed[0] = cur;
if (str[cur] && str[cur + 1] && str[cur + 2]
&& str[cur] == ':' && str[cur + 1] =='/' && str[cur + 2] =='/')
cur += 3;
else
return -1;//not match with ://
int last = cur;
int idx = 0;
bool is_ipv6 = false;
for (int i = cur; str[i]; i++)
for (i = 0; str[i]; i++)
{
if (str[i] == '/' || str[i] == '?' || str[i] == '#')
if (str[i] == ':')
{
// break on end of host
st[1] = cur;
ed[1] = cur;
break;
}
if (str[i] == '@')
{
st[1] = cur;
ed[1] = i;
cur = i + 1;
end_idx[URI_SCHEME] = i++;
break;
}
}
if (str[cur] == '[')
if (end_idx[URI_SCHEME] == 0)
return -1;
if (str[i] == '/' && str[i + 1] == '/')
{
st[2] = ++cur;
while (str[cur] && str[cur] != ']')
cur++;
pre_state = URI_HOST;
i += 2;
if (str[i] == '[')
in_ipv6= true;
else
start_idx[URI_USERINFO] = i;
if (str[cur] != ']')
return -1;
start_idx[URI_HOST] = i;
}
else
{
pre_state = URI_PATH;
start_idx[URI_PATH] = i;
}
is_ipv6 = true;
ed[2] = cur++;
if (str[cur])
bool skip_path = false;
if (start_idx[URI_PATH] == 0)
{
for (; str[i]; i++)
{
bool is_sep = false;
for (int i = 0; i < 4; i++)
state = authority_map[(unsigned char)str[i]];
switch (state)
{
if (str[cur] == sep[i])
{
is_sep = true;
case URI_USERINFO:
if (str[i + 1] == '[')
in_ipv6 = true;
end_idx[URI_USERINFO] = i;
start_idx[URI_HOST] = i + 1;
pre_state = URI_HOST;
break;
}
}
if (!is_sep)
return -1;
}
}
else
last = cur;
case URI_HOST:
if (str[i - 1] == ']')
in_ipv6 = false;
//userinfo@host:port/path?query#fragment
//host[:port][/path][?query][#fragment]
for (; str[cur]; cur++)
{
if (idx < 4)
{
for (int i = idx; i < 4; i++)
{
if (str[cur] == sep[i])
{
if (is_ipv6)
is_ipv6 = false;
else
if (!in_ipv6)
{
st[idx + 2] = last;
ed[idx + 2] = cur;
end_idx[URI_HOST] = i;
start_idx[URI_PORT] = i + 1;
pre_state = URI_PORT;
}
idx = i + 1;
if (sep[i] == '/')
last = cur;
else
last = cur + 1;
break;
}
case URI_QUERY:
end_idx[pre_state] = i;
start_idx[URI_QUERY] = i + 1;
pre_state = URI_QUERY;
skip_path = true;
break;
case URI_FRAGMENT:
end_idx[pre_state] = i;
start_idx[URI_FRAGMENT] = i + 1;
end_idx[URI_FRAGMENT] = i + strlen(str + i);
pre_state = URI_PART_ELEMENTS;
skip_path = true;
break;
case URI_PATH:
start_idx[URI_PATH] = i;
break;
default:
if (pre_state != URI_QUERY &&
!valid_char[pre_state][(unsigned char)str[i]])
return -1;//invalid char
}
if (state >= URI_FRAGMENT)
break;
}
}
if (pre_state != URI_PART_ELEMENTS)
end_idx[pre_state] = i;
if (cur > last && !is_ipv6)
if (!skip_path)
{
st[idx + 2] = last;
ed[idx + 2] = cur;
}
//check valid, skip scheme because of already checked
for (int i = 1; i < 5; i++)
{
for (int j = st[i]; j < ed[i]; j++)
if (!valid_char[i][(unsigned char)str[j]])
return -1;//invalid char
}
char **dst[7] = {&uri.scheme, &uri.userinfo, &uri.host, &uri.port,
&uri.path, &uri.query, &uri.fragment};
for (int i = 0; i < 7; i++)
{
if (ed[i] > st[i])
bool has_query = false;
pre_state = URI_PATH;
for (; str[i]; i++)
{
size_t len = ed[i] - st[i];
state = path_map[(unsigned char)str[i]];
if (state == URI_QUERY && !has_query)
{
has_query = true;
end_idx[URI_PATH] = i;
start_idx[URI_QUERY] = i + 1;
pre_state = URI_QUERY;
}
else if (state == URI_FRAGMENT)
{
end_idx[pre_state] = i;
start_idx[URI_FRAGMENT] = i + 1;
pre_state = URI_FRAGMENT;
break;
}
}
end_idx[pre_state] = i + strlen(str + i);
}
char **dst[URI_PART_ELEMENTS] = {&uri.scheme, &uri.userinfo, &uri.host, &uri.port,
&uri.query, &uri.fragment, &uri.path};
for (int i = 0; i < URI_PART_ELEMENTS; i++)
{
if (end_idx[i] > start_idx[i])
{
size_t len = end_idx[i] - start_idx[i];
*dst[i] = (char *)realloc(*dst[i], len + 1);
if (*dst[i] == NULL)
@@ -545,14 +421,14 @@ int URIParser::parse(const char *str, ParsedURI& uri)
return -1;
}
memcpy(*dst[i], str + st[i], len);
(*dst[i])[len] = '\0';
if (i == 2 && len >= 3 && (*dst[2])[0] == '%' && (*dst[2])[1] == '2' && ((*dst[2])[2] == 'F' || (*dst[2])[2] == 'f'))
if (i == URI_HOST && str[start_idx[i]] == '[')
{
len = StringUtil::url_decode(*dst[2], len);
(*dst[i])[len] = '\0';
len -= 2;
memcpy(*dst[i], str + start_idx[i] + 1, len);
}
else
memcpy(*dst[i], str + start_idx[i], len);
(*dst[i])[len] = '\0';
}
else
{

View File

@@ -14,6 +14,7 @@
limitations under the License.
Authors: Wu Jiaxu (wujiaxu@sogou-inc.com)
Wang Zhulei (wangzhulei@sogou-inc.com)
*/
#ifndef _URIPARSER_H_
@@ -52,20 +53,80 @@ public:
virtual ~ParsedURI() { deinit(); }
//copy constructor
ParsedURI(const ParsedURI& copy);
ParsedURI(const ParsedURI& copy) { __copy(copy); }
//copy operator
ParsedURI& operator= (const ParsedURI& copy);
ParsedURI& operator= (const ParsedURI& copy)
{
if (this != &copy)
{
deinit();
__copy(copy);
}
return *this;
}
//move constructor
ParsedURI(ParsedURI&& move);
ParsedURI(ParsedURI&& move)
{
scheme = move.scheme;
userinfo = move.userinfo;
host = move.host;
port = move.port;
path = move.path;
query = move.query;
fragment = move.fragment;
state = move.state;
error = move.error;
move.init();
}
//move operator
ParsedURI& operator= (ParsedURI&& move);
ParsedURI& operator= (ParsedURI&& move)
{
if (this != &move)
{
deinit();
scheme = move.scheme;
userinfo = move.userinfo;
host = move.host;
port = move.port;
path = move.path;
query = move.query;
fragment = move.fragment;
state = move.state;
error = move.error;
move.init();
}
return *this;
}
private:
void init();
void deinit();
void init()
{
scheme = NULL;
userinfo = NULL;
host = NULL;
port = NULL;
path = NULL;
query = NULL;
fragment = NULL;
state = URI_STATE_INIT;
error = 0;
}
void deinit()
{
free(scheme);
free(userinfo);
free(host);
free(port);
free(path);
free(query);
free(fragment);
}
void __copy(const ParsedURI& copy);
void __move(ParsedURI&& move);
};
// static class
@@ -74,7 +135,10 @@ class URIParser
public:
// return 0 mean succ, -1 mean fail
static int parse(const char *str, ParsedURI& uri);
static int parse(const std::string& str, ParsedURI& uri);
static int parse(const std::string& str, ParsedURI& uri)
{
return parse(str.c_str(), uri);
}
static std::map<std::string, std::vector<std::string>>
split_query_strict(const std::string &query);
@@ -85,57 +149,5 @@ public:
static std::vector<std::string> split_path(const std::string &path);
};
////////////////////
inline void ParsedURI::init()
{
scheme = NULL;
userinfo = NULL;
host = NULL;
port = NULL;
path = NULL;
query = NULL;
fragment = NULL;
state = URI_STATE_INIT;
error = 0;
}
inline ParsedURI::ParsedURI(const ParsedURI& copy)
{
__copy(copy);
}
inline ParsedURI::ParsedURI(ParsedURI&& move)
{
__move(std::move(move));
}
inline ParsedURI& ParsedURI::operator= (const ParsedURI& copy)
{
if (this != &copy)
{
deinit();
__copy(copy);
}
return *this;
}
inline ParsedURI& ParsedURI::operator= (ParsedURI&& move)
{
if (this != &move)
{
deinit();
__move(std::move(move));
}
return *this;
}
inline int URIParser::parse(const std::string& str, ParsedURI& uri)
{
return parse(str.c_str(), uri);
}
#endif

View File

@@ -40,6 +40,7 @@ set(TEST_LIST
upstream_unittest
dns_unittest
resource_unittest
uriparser_unittest
)
if (APPLE)
@@ -53,6 +54,7 @@ foreach(src ${TEST_LIST})
target_link_libraries(${src} ${WORKFLOW_LIB} GTest::GTest GTest::Main)
add_test(${src} ${src})
add_dependencies(check ${src})
set_property(SOURCE ${src} APPEND PROPERTY COMPILE_OPTIONS "-fno-rtti")
endforeach()
foreach(src ${TEST_LIST})

197
test/uriparser_unittest.cc Normal file
View File

@@ -0,0 +1,197 @@
/*
Copyright (c) 2021 Sogou, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Author: Wang Zhulei (wangzhulei@sogou-inc.com)
*/
#include <gtest/gtest.h>
#include "workflow/URIParser.h"
TEST(uriparser_unittest, parse)
{
ParsedURI uri;
EXPECT_EQ(URIParser::parse("https://john.doe@www.example.com:123/forum/questions/?tag=networking&order=newest#top", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "https"), 0);
EXPECT_EQ(strcmp(uri.userinfo, "john.doe"), 0);
EXPECT_EQ(strcmp(uri.host, "www.example.com"), 0);
EXPECT_EQ(strcmp(uri.port, "123"), 0);
EXPECT_EQ(strcmp(uri.path, "/forum/questions/"), 0);
EXPECT_EQ(strcmp(uri.query, "tag=networking&order=newest"), 0);
EXPECT_EQ(strcmp(uri.fragment, "top"), 0);
EXPECT_EQ(URIParser::parse("ldap://[2001:db8::7]/c=GB?objectClass?one", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "ldap"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "2001:db8::7"), 0);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "/c=GB"), 0);
EXPECT_EQ(strcmp(uri.query, "objectClass?one"), 0);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("ldap://user@[2001:db8::7]/c=GB?objectClass?one", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "ldap"), 0);
EXPECT_EQ(strcmp(uri.userinfo, "user"), 0);
EXPECT_EQ(strcmp(uri.host, "2001:db8::7"), 0);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "/c=GB"), 0);
EXPECT_EQ(strcmp(uri.query, "objectClass?one"), 0);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("ldap://user@[2001:db8::7]:12345/c=GB?objectClass?one", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "ldap"), 0);
EXPECT_EQ(strcmp(uri.userinfo, "user"), 0);
EXPECT_EQ(strcmp(uri.host, "2001:db8::7"), 0);
EXPECT_EQ(strcmp(uri.port, "12345"), 0);
EXPECT_EQ(strcmp(uri.path, "/c=GB"), 0);
EXPECT_EQ(strcmp(uri.query, "objectClass?one"), 0);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("ldap://[2001:db8::7]:12345/c=GB?objectClass?one", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "ldap"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "2001:db8::7"), 0);
EXPECT_EQ(strcmp(uri.port, "12345"), 0);
EXPECT_EQ(strcmp(uri.path, "/c=GB"), 0);
EXPECT_EQ(strcmp(uri.query, "objectClass?one"), 0);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("mailto:John.Doe@example.com", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "mailto"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(uri.host, nullptr);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "John.Doe@example.com"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("news:comp.infosystems.www.servers.unix", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "news"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(uri.host, nullptr);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "comp.infosystems.www.servers.unix"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("tel:+1-816-555-1212", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "tel"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(uri.host, nullptr);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "+1-816-555-1212"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("telnet://192.0.2.16:80/", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "telnet"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "192.0.2.16"), 0);
EXPECT_EQ(strcmp(uri.port, "80"), 0);
EXPECT_EQ(strcmp(uri.path, "/"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("urn:oasis:names:specification:docbook:dtd:xml:4.1.2", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "urn"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(uri.host, nullptr);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "oasis:names:specification:docbook:dtd:xml:4.1.2"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("https://www.example.com:123/forum/questions/?tag=networking&order=newest#top", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "https"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "www.example.com"), 0);
EXPECT_EQ(strcmp(uri.port, "123"), 0);
EXPECT_EQ(strcmp(uri.path, "/forum/questions/"), 0);
EXPECT_EQ(strcmp(uri.query, "tag=networking&order=newest"), 0);
EXPECT_EQ(strcmp(uri.fragment, "top"), 0);
EXPECT_EQ(URIParser::parse("https://john.doe@www.example.com/forum/questions/?tag=networking&order=newest#top", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "https"), 0);
EXPECT_EQ(strcmp(uri.userinfo, "john.doe"), 0);
EXPECT_EQ(strcmp(uri.host, "www.example.com"), 0);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "/forum/questions/"), 0);
EXPECT_EQ(strcmp(uri.query, "tag=networking&order=newest"), 0);
EXPECT_EQ(strcmp(uri.fragment, "top"), 0);
EXPECT_EQ(URIParser::parse("foo:/index.html", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "foo"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(uri.host, nullptr);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "/index.html"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("http://www.test.cn/subject/ttt/index.html?abc-def-jki-lm-rstuvwxyz", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "http"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "www.test.cn"), 0);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "/subject/ttt/index.html"), 0);
EXPECT_EQ(strcmp(uri.query, "abc-def-jki-lm-rstuvwxyz"), 0);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("http://sg.test1.com/zt/zz/#这是中文测试", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "http"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "sg.test1.com"), 0);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "/zt/zz/"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(strcmp(uri.fragment, "这是中文测试"), 0);
EXPECT_EQ(URIParser::parse("http://www.test2.com?sg_vid=R_3qHh9H471Ry8OtW5J9R10vc_QR6EQqgA6HHLO6666666qe0Co66666666", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "http"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "www.test2.com"), 0);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(uri.path, nullptr);
EXPECT_EQ(strcmp(uri.query, "sg_vid=R_3qHh9H471Ry8OtW5J9R10vc_QR6EQqgA6HHLO6666666qe0Co66666666"), 0);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("https://sgsares.test3.com/ttts/中文测试_4115.apk", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "https"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "sgsares.test3.com"), 0);
EXPECT_EQ(uri.port, nullptr);
EXPECT_EQ(strcmp(uri.path, "/ttts/中文测试_4115.apk"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("http://viptest.test5.com:8484?sg_vid=Rucnk5BKG81RcIVk7XySNhQtBODR6mKXA06PpWA66666663MTAfR6666666", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "http"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "viptest.test5.com"), 0);
EXPECT_EQ(strcmp(uri.port, "8484"), 0);
EXPECT_EQ(uri.path, nullptr);
EXPECT_EQ(strcmp(uri.query, "sg_vid=Rucnk5BKG81RcIVk7XySNhQtBODR6mKXA06PpWA66666663MTAfR6666666"), 0);
EXPECT_EQ(uri.fragment, nullptr);
EXPECT_EQ(URIParser::parse("http://viptest1.test6.com:84/abc#frag", uri), 0);
EXPECT_EQ(strcmp(uri.scheme, "http"), 0);
EXPECT_EQ(uri.userinfo, nullptr);
EXPECT_EQ(strcmp(uri.host, "viptest1.test6.com"), 0);
EXPECT_EQ(strcmp(uri.port, "84"), 0);
EXPECT_EQ(strcmp(uri.path, "/abc"), 0);
EXPECT_EQ(uri.query, nullptr);
EXPECT_EQ(strcmp(uri.fragment, "frag"), 0);
}