Skip to content

Commit cdecd6c

Browse files
jeking3sbiscigl
authored andcommitted
Add an option to fix non-compliant RFC3986 encoding.
1 parent 3f812fe commit cdecd6c

File tree

6 files changed

+118
-53
lines changed

6 files changed

+118
-53
lines changed

aws-cpp-sdk-core-tests/http/URITest.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,20 @@ TEST(URITest, TestParseWithColon)
281281
EXPECT_EQ(80, complexUri.GetPort());
282282
EXPECT_STREQ("/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject:1234/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject:Key", complexUri.GetPath().c_str());
283283
EXPECT_STREQ(strComplexUri, complexUri.GetURIString().c_str());
284+
}
284285

286+
TEST(URITest, TestParseWithColonCompliant)
287+
{
288+
Aws::Http::SetCompliantRfc3986Encoding(true);
289+
const char* strComplexUri = "http://s3.us-east-1.amazonaws.com/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject:1234/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject:Key";
290+
URI complexUri(strComplexUri);
291+
const char* compliantStrComplexUri = "http://s3.us-east-1.amazonaws.com/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject%3A1234/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject%3AKey";
292+
EXPECT_EQ(Scheme::HTTP, complexUri.GetScheme());
293+
EXPECT_STREQ("s3.us-east-1.amazonaws.com", complexUri.GetAuthority().c_str());
294+
EXPECT_EQ(80, complexUri.GetPort());
295+
EXPECT_STREQ("/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject:1234/awsnativesdkputobjectstestbucket20150702T200059Z/TestObject:Key", complexUri.GetPath().c_str());
296+
EXPECT_STREQ(compliantStrComplexUri, complexUri.GetURIString().c_str());
297+
Aws::Http::SetCompliantRfc3986Encoding(false);
285298
}
286299

287300
TEST(URITest, TestGetURLEncodedPath)
@@ -328,3 +341,31 @@ TEST(URITest, TestGetRFC3986URLEncodedPath)
328341
uri = "https://test.com/segment+other/b;jsession=1";
329342
EXPECT_STREQ("/segment%2Bother/b%3Bjsession=1", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
330343
}
344+
345+
TEST(URITest, TestGetRFC3986URLEncodedPathCompliant)
346+
{
347+
Aws::Http::SetCompliantRfc3986Encoding(true);
348+
349+
URI uri = "https://test.com/path/1234/";
350+
EXPECT_STREQ("/path/1234/", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
351+
352+
uri = "https://test.com/path/$omething";
353+
EXPECT_STREQ("/path/%24omething", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
354+
355+
uri = "https://test.com/path/$omethingel$e";
356+
EXPECT_STREQ("/path/%24omethingel%24e", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
357+
358+
uri = "https://test.com/path/~something.an0ther";
359+
EXPECT_STREQ("/path/~something.an0ther", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
360+
361+
uri = "https://test.com/path/~something?an0ther";
362+
EXPECT_STREQ("/path/~something", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
363+
364+
uri = "https://test.com/ሴ";
365+
EXPECT_STREQ("/%E1%88%B4", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
366+
367+
uri = "https://test.com/segment+other/b;jsession=1";
368+
EXPECT_STREQ("/segment%2Bother/b%3Bjsession%3D1", URI::URLEncodePathRFC3986(uri.GetPath()).c_str());
369+
370+
Aws::Http::SetCompliantRfc3986Encoding(false);
371+
}

aws-cpp-sdk-core/include/aws/core/Aws.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ namespace Aws
8080
*/
8181
struct HttpOptions
8282
{
83-
HttpOptions() : initAndCleanupCurl(true), installSigPipeHandler(false)
83+
HttpOptions() : initAndCleanupCurl(true), installSigPipeHandler(false), compliantRfc3986Encoding(false)
8484
{ }
8585

8686
/**
@@ -100,6 +100,10 @@ namespace Aws
100100
* NOTE: CURLOPT_NOSIGNAL is already being set.
101101
*/
102102
bool installSigPipeHandler;
103+
/**
104+
* Disable legacy URL encoding that leaves `$&,:@=` unescaped for legacy purposes.
105+
*/
106+
bool compliantRfc3986Encoding;
103107
};
104108

105109
/**

aws-cpp-sdk-core/include/aws/core/http/URI.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ namespace Aws
2121
static const uint16_t HTTP_DEFAULT_PORT = 80;
2222
static const uint16_t HTTPS_DEFAULT_PORT = 443;
2323

24+
extern bool s_compliantRfc3986Encoding;
25+
AWS_CORE_API void SetCompliantRfc3986Encoding(bool compliant);
26+
2427
//per https://tools.ietf.org/html/rfc3986#section-3.4 there is nothing preventing servers from allowing
2528
//multiple values for the same key. So use a multimap instead of a map.
2629
typedef Aws::MultiMap<Aws::String, Aws::String> QueryStringParameterCollection;

aws-cpp-sdk-core/source/Aws.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ namespace Aws
136136

137137
Aws::Http::SetInitCleanupCurlFlag(options.httpOptions.initAndCleanupCurl);
138138
Aws::Http::SetInstallSigPipeHandlerFlag(options.httpOptions.installSigPipeHandler);
139+
Aws::Http::SetCompliantRfc3986Encoding(options.httpOptions.compliantRfc3986Encoding);
139140
Aws::Http::InitHttp();
140141
Aws::InitializeEnumOverflowContainer();
141142
cJSON_AS4CPP_Hooks hooks;

aws-cpp-sdk-core/source/http/URI.cpp

Lines changed: 47 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,48 @@ namespace Http
2424

2525
const char* SEPARATOR = "://";
2626

27+
bool s_compliantRfc3986Encoding = false;
28+
void SetCompliantRfc3986Encoding(bool compliant) { s_compliantRfc3986Encoding = compliant; }
29+
30+
Aws::String urlEncodeSegment(const Aws::String& segment)
31+
{
32+
// consolidates legacy escaping logic into one local method
33+
if (s_compliantRfc3986Encoding)
34+
{
35+
return StringUtils::URLEncode(segment.c_str());
36+
}
37+
else
38+
{
39+
Aws::StringStream ss;
40+
ss << std::hex << std::uppercase;
41+
for(unsigned char c : segment) // alnum results in UB if the value of c is not unsigned char & is not EOF
42+
{
43+
// RFC 3986 §2.3 unreserved characters
44+
if (StringUtils::IsAlnum(c))
45+
{
46+
ss << c;
47+
continue;
48+
}
49+
switch(c)
50+
{
51+
// §2.3 unreserved characters
52+
// The path section of the URL allows unreserved characters to appear unescaped
53+
case '-': case '_': case '.': case '~':
54+
// RFC 3986 §2.2 Reserved characters
55+
// NOTE: this implementation does not accurately implement the RFC on purpose to accommodate for
56+
// discrepancies in the implementations of URL encoding between AWS services for legacy reasons.
57+
case '$': case '&': case ',':
58+
case ':': case '=': case '@':
59+
ss << c;
60+
break;
61+
default:
62+
ss << '%' << std::setfill('0') << std::setw(2) << (int)c << std::setw(0);
63+
}
64+
}
65+
return ss.str();
66+
}
67+
}
68+
2769
} // namespace Http
2870
} // namespace Aws
2971

@@ -101,7 +143,7 @@ void URI::SetScheme(Scheme value)
101143

102144
Aws::String URI::URLEncodePathRFC3986(const Aws::String& path)
103145
{
104-
if(path.empty())
146+
if (path.empty())
105147
{
106148
return path;
107149
}
@@ -113,34 +155,10 @@ Aws::String URI::URLEncodePathRFC3986(const Aws::String& path)
113155
// escape characters appearing in a URL path according to RFC 3986
114156
for (const auto& segment : pathParts)
115157
{
116-
ss << '/';
117-
for(unsigned char c : segment) // alnum results in UB if the value of c is not unsigned char & is not EOF
118-
{
119-
// §2.3 unreserved characters
120-
if (StringUtils::IsAlnum(c))
121-
{
122-
ss << c;
123-
continue;
124-
}
125-
switch(c)
126-
{
127-
// §2.3 unreserved characters
128-
case '-': case '_': case '.': case '~':
129-
// The path section of the URL allow reserved characters to appear unescaped
130-
// RFC 3986 §2.2 Reserved characters
131-
// NOTE: this implementation does not accurately implement the RFC on purpose to accommodate for
132-
// discrepancies in the implementations of URL encoding between AWS services for legacy reasons.
133-
case '$': case '&': case ',':
134-
case ':': case '=': case '@':
135-
ss << c;
136-
break;
137-
default:
138-
ss << '%' << std::setfill('0') << std::setw(2) << (int)((unsigned char)c) << std::setw(0);
139-
}
140-
}
158+
ss << '/' << urlEncodeSegment(segment);
141159
}
142160

143-
//if the last character was also a slash, then add that back here.
161+
// if the last character was also a slash, then add that back here.
144162
if (path.back() == '/')
145163
{
146164
ss << '/';
@@ -216,33 +234,10 @@ Aws::String URI::GetURLEncodedPathRFC3986() const
216234
ss << std::hex << std::uppercase;
217235

218236
// escape characters appearing in a URL path according to RFC 3986
237+
// (mostly; there is some non-standards legacy support that can be disabled)
219238
for (const auto& segment : m_pathSegments)
220239
{
221-
ss << '/';
222-
for(unsigned char c : segment) // alnum results in UB if the value of c is not unsigned char & is not EOF
223-
{
224-
// §2.3 unreserved characters
225-
if (StringUtils::IsAlnum(c))
226-
{
227-
ss << c;
228-
continue;
229-
}
230-
switch(c)
231-
{
232-
// §2.3 unreserved characters
233-
case '-': case '_': case '.': case '~':
234-
// The path section of the URL allow reserved characters to appear unescaped
235-
// RFC 3986 §2.2 Reserved characters
236-
// NOTE: this implementation does not accurately implement the RFC on purpose to accommodate for
237-
// discrepancies in the implementations of URL encoding between AWS services for legacy reasons.
238-
case '$': case '&': case ',':
239-
case ':': case '=': case '@':
240-
ss << c;
241-
break;
242-
default:
243-
ss << '%' << std::setfill('0') << std::setw(2) << (int)((unsigned char)c) << std::setw(0);
244-
}
245-
}
240+
ss << '/' << urlEncodeSegment(segment);
246241
}
247242

248243
if (m_pathSegments.empty() || m_pathHasTrailingSlash)

aws-cpp-sdk-s3-integration-tests/RunTests.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,34 @@
99
#include <aws/testing/TestingEnvironment.h>
1010
#include <aws/testing/MemoryTesting.h>
1111

12+
void ParseArgs(int argc, char** argv, Aws::SDKOptions& options)
13+
{
14+
// std::string rather than Aws::String since this happens before the memory manager is initialized
15+
const std::string resourcePrefixOption = "--rfc3986_compliant=";
16+
// list other options here
17+
for(int i = 1; i < argc; i++)
18+
{
19+
std::string arg = argv[i];
20+
if(arg.find(resourcePrefixOption) == 0)
21+
{
22+
arg = arg.substr(resourcePrefixOption.length()); // get whatever value after the '='
23+
if (arg == "true" || arg == "1")
24+
{
25+
std::cout << "Set RFC3986 compliance mode ON." << std::endl;
26+
options.httpOptions.compliantRfc3986Encoding = true;
27+
}
28+
}
29+
}
30+
}
31+
1232
int main(int argc, char** argv)
1333
{
1434
Aws::SDKOptions options;
1535
options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
1636
AWS_BEGIN_MEMORY_TEST_EX(options, 1024, 128);
1737
Aws::Testing::InitPlatformTest(options);
1838
Aws::Testing::ParseArgs(argc, argv);
39+
ParseArgs(argc, argv, options);
1940

2041
Aws::InitAPI(options);
2142
::testing::InitGoogleTest(&argc, argv);

0 commit comments

Comments
 (0)