Quick Start¶
This guide will help you get started with Conflux S3 Utils quickly.
Basic Usage¶
Initializing the Client¶
from conflux_s3_utils import S3Client
# Create a client with default boto3 configuration
s3 = S3Client()
# Or provide your own boto3 client
import boto3
custom_client = boto3.client('s3', region_name='us-west-2')
s3 = S3Client(client=custom_client)
Working with S3 URIs¶
S3Uri provides a type-safe way to work with S3 URIs:
from conflux_s3_utils import S3Uri
# Parse from a S3 URI string
s3uri = S3Uri.from_str("s3://bucket/path/to/object.txt")
# Access bucket and path components
print(s3uri.bucket) # "bucket"
print(s3uri.path) # "path/to/object.txt"
print(s3uri.filename) # "object.txt"
# Convert back to a S3 URI string
print(str(s3uri)) # "s3://bucket/path/to/object.txt"
Path Manipulation¶
S3Uri supports convenient path manipulation:
# Replace the entire path
s3uri_dir = s3uri.with_path("new/path/directory")
# Join paths using the slash operator
child_s3uri = s3uri_dir / "subfolder" / "file.txt"
# Result: s3://bucket/new/path/directory/subfolder/file.txt
# Join paths using join_path method
child_s3uri = s3uri_dir.join_path("subfolder", "file.txt")
Common Operations¶
Opening Files¶
# Open an S3 object directly using fsspec
s3uri = S3Uri.from_str("s3://bucket/data.json")
# Read mode
with s3.open(s3uri, mode="rb") as f:
data = f.read()
# Write mode
with s3.open(s3uri, mode="wb") as f:
f.write(b"Hello, S3!")
Working with Local Files¶
The open_local method is useful when you need a local file path:
# Read mode: Download from S3, work with local file
s3uri = S3Uri.from_str("s3://bucket/data.csv")
with s3.open_local(s3uri, mode="rb") as f:
# File is downloaded to a temporary location
data = f.read()
# Temporary file is automatically cleaned up
# Write mode: Work with local file, upload to S3
with s3.open_local(s3uri, mode="wb") as f:
f.write(b"New data")
# File is automatically uploaded to S3 and cleaned up
Checking Object Existence¶
s3uri = S3Uri.from_str("s3://bucket/path/to/object.txt")
if s3.object_exists(s3uri):
print("Object exists!")
Listing Objects¶
# List objects directly under a path (non-recursive)
s3uri = S3Uri.from_str("s3://bucket/path/to/directory")
for obj_uri in s3.list_objects(s3uri):
print(obj_uri)
# List all objects recursively
for obj_uri in s3.list_objects(s3uri, recursive=True):
print(obj_uri)
Uploading and Downloading¶
from pathlib import Path
# Upload a local file
local_file = Path("./local/data.txt")
s3uri = S3Uri.from_str("s3://bucket/remote/data.txt")
s3.upload_file(local_file, s3uri)
# Download a file
s3.download_file(s3uri, local_file)
# Customize multipart upload/download chunk size (default 8MB)
s3.upload_file(local_file, s3uri, multipart_chunksize=16*1024*1024) # 16MB chunks
Uploading Directories¶
from pathlib import Path
# Upload an entire directory
local_dir = Path("./local/directory")
s3uri = S3Uri.from_str("s3://bucket/remote/directory")
s3.upload_directory(local_dir, s3uri)
# Upload with custom concurrency (default is 10 concurrent uploads)
s3.upload_directory(local_dir, s3uri, concurrency=5)
# Exclude specific files
exclude = {Path("secret.txt"), Path("cache/temp.dat")}
s3.upload_directory(local_dir, s3uri, exclude=exclude)
Copying and Deleting¶
# Copy an object within S3
src = S3Uri.from_str("s3://bucket/source/file.txt")
dest = S3Uri.from_str("s3://bucket/destination/file.txt")
s3.copy_object(src, dest)
# Delete an object
s3.delete_object(s3uri)
Complete Example¶
Here’s a complete example that demonstrates common operations:
from pathlib import Path
from conflux_s3_utils import S3Client, S3Uri
# Initialize client
s3 = S3Client()
# Parse S3 URIs
base_uri = S3Uri.from_str("s3://my-bucket/data")
input_uri = base_uri / "input" / "data.csv"
output_uri = base_uri / "output" / "results.txt"
# Check if input exists
if not s3.object_exists(input_uri):
raise ValueError(f"Input file not found: {input_uri}")
# Download and process
local_input = Path("./temp/input.csv")
s3.download_file(input_uri, local_input)
# Process the file
with open(local_input, "r") as f:
processed_data = f.read().upper()
# Upload results
local_output = Path("./temp/output.txt")
with open(local_output, "w") as f:
f.write(processed_data)
s3.upload_file(local_output, output_uri)
print(f"Results uploaded to: {output_uri}")
Next Steps¶
Check out the API Reference for detailed API documentation
See more Examples for advanced usage patterns