-
Notifications
You must be signed in to change notification settings - Fork 910
S3 URI Parser #3874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
S3 URI Parser #3874
Changes from 10 commits
18e7f8b
20309ba
996ac35
68a79de
40fd6c6
00afb77
0c93692
1bc7894
e929cb3
d1d323d
301be45
0c716b4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"category": "Amazon S3", | ||
"contributor": "", | ||
"type": "feature", | ||
"description": "Adding feature for parsing S3 URIs" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,10 +20,14 @@ | |
import java.net.URI; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.function.Consumer; | ||
import java.util.function.Supplier; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
import software.amazon.awssdk.annotations.Immutable; | ||
import software.amazon.awssdk.annotations.SdkInternalApi; | ||
import software.amazon.awssdk.annotations.SdkPublicApi; | ||
|
@@ -62,8 +66,11 @@ | |
import software.amazon.awssdk.services.s3.internal.endpoints.UseGlobalEndpointResolver; | ||
import software.amazon.awssdk.services.s3.model.GetObjectRequest; | ||
import software.amazon.awssdk.services.s3.model.GetUrlRequest; | ||
import software.amazon.awssdk.services.s3.parsing.S3Uri; | ||
import software.amazon.awssdk.utils.AttributeMap; | ||
import software.amazon.awssdk.utils.StringUtils; | ||
import software.amazon.awssdk.utils.Validate; | ||
import software.amazon.awssdk.utils.http.SdkHttpUtils; | ||
|
||
/** | ||
* Utilities for working with Amazon S3 objects. An instance of this class can be created by: | ||
|
@@ -94,7 +101,7 @@ | |
@SdkPublicApi | ||
public final class S3Utilities { | ||
private static final String SERVICE_NAME = "s3"; | ||
|
||
private static final Pattern ENDPOINT_PATTERN = Pattern.compile("^(.+\\.)?s3[.-]([a-z0-9-]+)\\."); | ||
private final Region region; | ||
private final URI endpoint; | ||
private final S3Configuration s3Configuration; | ||
|
@@ -251,6 +258,162 @@ public URL getUrl(GetUrlRequest getUrlRequest) { | |
} | ||
} | ||
|
||
/** | ||
* Returns a parsed {@link S3Uri} with which a user can easily retrieve the bucket, key, region, style, and query | ||
* parameters of the URI. Only path-style and virtual-hosted-style URI parsing is supported, including CLI-style | ||
* URIs, e.g., "s3://bucket/key". AccessPoints and Outposts URI parsing is not supported. If you work with object keys | ||
* and/or query parameters with special characters, they must be URL-encoded, e.g., replace " " with "%20". If you work with | ||
* virtual-hosted-style URIs with bucket names that contain a dot, i.e., ".", the dot must not be URL-encoded. Encoded | ||
* buckets, keys, and query parameters will be returned decoded. | ||
* | ||
* <p> | ||
* For more information on path-style and virtual-hosted-style URIs, see <a href= | ||
* "https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html" | ||
* >Methods for accessing a bucket</a>. | ||
* | ||
* @param uri The URI to be parsed | ||
* @return Parsed {@link S3Uri} | ||
* | ||
* <p><b>Example Usage</b> | ||
* <p> | ||
* {@snippet : | ||
* S3Client s3Client = S3Client.create(); | ||
* S3Utilities s3Utilities = s3Client.utilities(); | ||
* String uriString = "https://myBucket.s3.us-west-1.amazonaws.com/doc.txt?versionId=abc123"; | ||
* URI uri = URI.create(uriString); | ||
* S3Uri s3Uri = s3Utilities.parseUri(uri); | ||
* | ||
* String bucket = s3Uri.bucket().orElse(null); // "myBucket" | ||
* String key = s3Uri.key().orElse(null); // "doc.txt" | ||
* Region region = s3Uri.region().orElse(null); // Region.US_WEST_1 | ||
* boolean isPathStyle = s3Uri.isPathStyle(); // false | ||
* String versionId = s3Uri.firstMatchingRawQueryParameter("versionId").orElse(null); // "abc123" | ||
*} | ||
*/ | ||
public S3Uri parseUri(URI uri) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not a blocker, but we may want to support string, which we will encode for them in the future depending on customer ask. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I tried using preprocessUrlStr() from v1 to encode but it runs into issues in cases like:
Seems to be pretty complicated to fix these... |
||
validateUri(uri); | ||
|
||
if ("s3".equalsIgnoreCase(uri.getScheme())) { | ||
return parseAwsCliStyleUri(uri); | ||
} | ||
|
||
return parseStandardUri(uri); | ||
} | ||
|
||
private S3Uri parseStandardUri(URI uri) { | ||
|
||
if (uri.getHost() == null) { | ||
throw new IllegalArgumentException("Invalid S3 URI: no hostname: " + uri); | ||
} | ||
|
||
Matcher matcher = ENDPOINT_PATTERN.matcher(uri.getHost()); | ||
if (!matcher.find()) { | ||
throw new IllegalArgumentException("Invalid S3 URI: hostname does not appear to be a valid S3 endpoint: " + uri); | ||
} | ||
zoewangg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
S3Uri.Builder builder = S3Uri.builder().uri(uri); | ||
addRegionIfNeeded(builder, matcher.group(2)); | ||
addQueryParamsIfNeeded(builder); | ||
|
||
String prefix = matcher.group(1); | ||
if (StringUtils.isEmpty(prefix)) { | ||
return parsePathStyleUri(builder); | ||
} | ||
return parseVirtualHostedStyleUri(builder, matcher); | ||
} | ||
|
||
private S3Uri.Builder addRegionIfNeeded(S3Uri.Builder builder, String region) { | ||
if (!"amazonaws".equals(region)) { | ||
return builder.region(Region.of(region)); | ||
} | ||
return builder; | ||
} | ||
|
||
private S3Uri.Builder addQueryParamsIfNeeded(S3Uri.Builder builder) { | ||
if (builder.uri().getQuery() != null) { | ||
return builder.queryParams(SdkHttpUtils.uriParams(builder.uri())); | ||
} | ||
return builder; | ||
} | ||
|
||
private S3Uri parsePathStyleUri(S3Uri.Builder builder) { | ||
String bucket = null; | ||
String key = null; | ||
String path = builder.uri().getPath(); | ||
|
||
if (!StringUtils.isEmpty(path) && !"/".equals(path)) { | ||
int index = path.indexOf('/', 1); | ||
|
||
if (index == -1) { | ||
// No trailing slash, e.g., "https://s3.amazonaws.com/bucket" | ||
bucket = path.substring(1); | ||
} else { | ||
bucket = path.substring(1, index); | ||
if (index != path.length() - 1) { | ||
key = path.substring(index + 1); | ||
} | ||
} | ||
} | ||
return builder.key(key) | ||
.bucket(bucket) | ||
.isPathStyle(true) | ||
.build(); | ||
} | ||
|
||
private S3Uri parseVirtualHostedStyleUri(S3Uri.Builder builder, Matcher matcher) { | ||
String bucket; | ||
String key = null; | ||
String path = builder.uri().getPath(); | ||
String prefix = matcher.group(1); | ||
|
||
bucket = prefix.substring(0, prefix.length() - 1); | ||
if (!StringUtils.isEmpty(path) && !"/".equals(path)) { | ||
key = path.substring(1); | ||
} | ||
|
||
return builder.key(key) | ||
.bucket(bucket) | ||
.build(); | ||
} | ||
|
||
private S3Uri parseAwsCliStyleUri(URI uri) { | ||
String key = null; | ||
String bucket = uri.getAuthority(); | ||
Region region = null; | ||
boolean isPathStyle = false; | ||
Map<String, List<String>> queryParams = new HashMap<>(); | ||
String path = uri.getPath(); | ||
|
||
if (bucket == null) { | ||
throw new IllegalArgumentException("Invalid S3 URI: bucket not included: " + uri); | ||
} | ||
|
||
if (path.length() > 1) { | ||
key = path.substring(1); | ||
} | ||
|
||
return S3Uri.builder() | ||
.uri(uri) | ||
.bucket(bucket) | ||
.key(key) | ||
.region(region) | ||
.isPathStyle(isPathStyle) | ||
.queryParams(queryParams) | ||
.build(); | ||
} | ||
|
||
private void validateUri(URI uri) { | ||
Validate.paramNotNull(uri, "uri"); | ||
|
||
if (uri.toString().contains(".s3-accesspoint")) { | ||
throw new IllegalArgumentException("AccessPoints URI parsing is not supported: " + uri); | ||
} | ||
|
||
if (uri.toString().contains(".s3-outposts")) { | ||
throw new IllegalArgumentException("Outposts URI parsing is not supported: " + uri); | ||
} | ||
} | ||
|
||
private Region resolveRegionForGetUrl(GetUrlRequest getUrlRequest) { | ||
if (getUrlRequest.region() == null && this.region == null) { | ||
throw new IllegalArgumentException("Region should be provided either in GetUrlRequest object or S3Utilities object"); | ||
|
Uh oh!
There was an error while loading. Please reload this page.