Skip to content

Commit

Permalink
feat: Use upload session urls for chunk upload (#875)
Browse files Browse the repository at this point in the history
Closes: SDK-3836
  • Loading branch information
lukaszsocha2 authored Jun 4, 2024
1 parent 5a7c767 commit c67b03c
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 98 deletions.
25 changes: 21 additions & 4 deletions boxsdk/object/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,21 @@ def preflight_check(self, size: int, name: Optional[str] = None) -> Optional[str
)

@api_call
def create_upload_session(self, file_size: int, file_name: Optional[str] = None) -> 'UploadSession':
def create_upload_session(
self, file_size: int, file_name: Optional[str] = None, use_upload_session_urls: bool = True
) -> 'UploadSession':
"""
Create a new chunked upload session for uploading a new version of the file.
:param file_size:
The size of the file in bytes that will be uploaded.
:param file_name:
The new name of the file version that will be uploaded.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used,
unless a custom API.UPLOAD_URL was set in the config.
If False, the base upload url will be used.
:returns:
A :class:`UploadSession` object.
"""
Expand All @@ -68,13 +75,18 @@ def create_upload_session(self, file_size: int, file_name: Optional[str] = None)
body_params['file_name'] = file_name
url = self.get_url('upload_sessions').replace(self.session.api_config.BASE_API_URL, self.session.api_config.UPLOAD_URL)
response = self._session.post(url, data=json.dumps(body_params)).json()
return self.translator.translate(
upload_session = self.translator.translate(
session=self._session,
response_object=response,
)
# pylint:disable=protected-access
upload_session._use_upload_session_urls = use_upload_session_urls
return upload_session

@api_call
def get_chunked_uploader(self, file_path: str, rename_file: bool = False) -> 'ChunkedUploader':
def get_chunked_uploader(
self, file_path: str, rename_file: bool = False, use_upload_session_urls: bool = True
) -> 'ChunkedUploader':
# pylint: disable=consider-using-with
"""
Instantiate the chunked upload instance and create upload session with path to file.
Expand All @@ -83,13 +95,18 @@ def get_chunked_uploader(self, file_path: str, rename_file: bool = False) -> 'Ch
The local path to the file you wish to upload.
:param rename_file:
Indicates whether the file should be renamed or not.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used,
unless a custom API.UPLOAD_URL was set in the config.
If False, the base upload url will be used.
:returns:
A :class:`ChunkedUploader` object.
"""
total_size = os.stat(file_path).st_size
content_stream = open(file_path, 'rb')
file_name = os.path.basename(file_path) if rename_file else None
upload_session = self.create_upload_session(total_size, file_name)
upload_session = self.create_upload_session(total_size, file_name, use_upload_session_urls)
return upload_session.get_chunked_uploader_for_stream(content_stream, total_size)

def _get_accelerator_upload_url_for_update(self) -> Optional[str]:
Expand Down
23 changes: 19 additions & 4 deletions boxsdk/object/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,19 @@ def preflight_check(self, size: int, name: str) -> Optional[str]:
)

@api_call
def create_upload_session(self, file_size: int, file_name: str) -> 'UploadSession':
def create_upload_session(self, file_size: int, file_name: str, use_upload_session_urls: bool = True) -> 'UploadSession':
"""
Creates a new chunked upload session for upload a new file.
:param file_size:
The size of the file in bytes that will be uploaded.
:param file_name:
The name of the file that will be uploaded.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used,
unless a custom API.UPLOAD_URL was set in the config.
If False, the base upload url will be used.
:returns:
A :class:`UploadSession` object.
"""
Expand All @@ -133,13 +138,18 @@ def create_upload_session(self, file_size: int, file_name: str) -> 'UploadSessio
'file_name': file_name,
}
response = self._session.post(url, data=json.dumps(body_params)).json()
return self.translator.translate(
upload_session = self.translator.translate(
session=self._session,
response_object=response,
)
# pylint:disable=protected-access
upload_session._use_upload_session_urls = use_upload_session_urls
return upload_session

@api_call
def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None) -> 'ChunkedUploader':
def get_chunked_uploader(
self, file_path: str, file_name: Optional[str] = None, use_upload_session_urls: bool = True
) -> 'ChunkedUploader':
# pylint: disable=consider-using-with
"""
Instantiate the chunked upload instance and create upload session with path to file.
Expand All @@ -149,6 +159,11 @@ def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None)
:param file_name:
The name with extention of the file that will be uploaded, e.g. new_file_name.zip.
If not specified, the name from the local system is used.
:param use_upload_session_urls:
The parameter detrermining what urls to use to perform chunked upload.
If True, the urls returned by create_upload_session() endpoint response will be used,
unless a custom API.UPLOAD_URL was set in the config.
If False, the base upload url will be used.
:returns:
A :class:`ChunkedUploader` object.
"""
Expand All @@ -157,7 +172,7 @@ def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None)
content_stream = open(file_path, 'rb')

try:
upload_session = self.create_upload_session(total_size, upload_file_name)
upload_session = self.create_upload_session(total_size, upload_file_name, use_upload_session_urls)
return upload_session.get_chunked_uploader_for_stream(content_stream, total_size)
except Exception:
content_stream.close()
Expand Down
28 changes: 23 additions & 5 deletions boxsdk/object/upload_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from boxsdk import BoxAPIException
from boxsdk.util.api_call_decorator import api_call
from boxsdk.util.chunked_uploader import ChunkedUploader
from boxsdk.session.session import Session
from boxsdk.config import API
from .base_object import BaseObject
from ..pagination.limit_offset_based_dict_collection import LimitOffsetBasedDictCollection

Expand All @@ -19,11 +21,22 @@
class UploadSession(BaseObject):
_item_type = 'upload_session'
_parent_item_type = 'file'
_default_upload_url = API.UPLOAD_URL

def get_url(self, *args: Any) -> str:
def __init__(
self, session: Session, object_id: str, response_object: dict = None, use_upload_session_urls: bool = True
):
super().__init__(session, object_id, response_object)
self._use_upload_session_urls = use_upload_session_urls

def get_url(self, *args: Any, url_key: str = None) -> str:
"""
Base class override. Endpoint is a little different - it's /files/upload_sessions.
"""
session_endpoints = getattr(self, 'session_endpoints', {})
if self._use_upload_session_urls and url_key in session_endpoints and self.session.api_config.UPLOAD_URL == self._default_upload_url:
return session_endpoints[url_key]

return self._session.get_url(
f'{self._parent_item_type}s/{self._item_type}s',
self._object_id,
Expand All @@ -44,7 +57,7 @@ def get_parts(self, limit: Optional[int] = None, offset: Optional[int] = None) -
"""
return LimitOffsetBasedDictCollection(
session=self.session,
url=self.get_url('parts'),
url=self.get_url('parts', url_key='list_parts'),
limit=limit,
offset=offset,
fields=None,
Expand Down Expand Up @@ -87,7 +100,7 @@ def upload_part_bytes(
'Content-Range': f'bytes {offset}-{range_end}/{total_size}',
}
response = self._session.put(
self.get_url(),
self.get_url(url_key='upload_part'),
headers=headers,
data=part_bytes,
)
Expand Down Expand Up @@ -131,7 +144,7 @@ def commit(

try:
response = self._session.post(
self.get_url('commit'),
self.get_url('commit', url_key='commit'),
headers=headers,
data=json.dumps(body),
)
Expand All @@ -154,7 +167,12 @@ def abort(self) -> bool:
:returns:
A boolean indication success of the upload abort.
"""
return self.delete()

box_response = self._session.delete(
self.get_url(url_key='abort'),
expect_json_response=False
)
return box_response.ok

def get_chunked_uploader_for_stream(self, content_stream: IO[bytes], file_size: int) -> ChunkedUploader:
"""
Expand Down
2 changes: 2 additions & 0 deletions docs/usage/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ API.OAUTH2_AUTHORIZE_URL = 'https://my-company.com/authorize'

### Upload URL
The default URL used when uploading files to Box can be changed by assigning a new value to the `API.UPLOAD_URL` field.
If this variable is ever changed from default value, the SDK will alwayse use this URL to upload files to Box,
even if `use_upload_session_urls` is set to `True` while creating an upload session for a chunked upload.

```python
from boxsdk.config import API
Expand Down
50 changes: 26 additions & 24 deletions docs/usage/files.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,14 @@ Chunked Upload
--------------

For large files or in cases where the network connection is less reliable,
you may want to upload the file in parts. This allows a single part to fail
you may want to upload the file in parts. This allows a single part to fail
without aborting the entire upload, and failed parts can then be retried.

Since box-python-sdk 3.11.0 release, by default the SDK uses upload urls provided in response
when creating a new upload session. This allowes to always upload your content to the closest Box data center and
can significantly improve upload speed. You can always disable this feature and always use base upload url by
setting `use_upload_session_urls` flag to `False` when creating upload session.

### Automatic Uploader

Since box-python-sdk 3.7.0 release, automatic uploader uses multiple threads, which significantly speeds up the upload process.
Expand All @@ -211,9 +216,11 @@ API.CHUNK_UPLOAD_THREADS = 6
#### Upload new file

The SDK provides a method of automatically handling a chunked upload. First get a folder you want to upload the file to.
Then call [`folder.get_chunked_uploader(file_path, rename_file=False)`][get_chunked_uploader_for_file] to retrieve
a [`ChunkedUploader`][chunked_uploader_class] object. Calling the method [`chunked_upload.start()`][start] will
kick off the chunked upload process and return the [File][file_class]
Then call [`folder.get_chunked_uploader(file_path, rename_file=False, use_upload_session_urls=True)`][get_chunked_uploader_for_file]
to retrieve a [`ChunkedUploader`][chunked_uploader_class] object. Setting `use_upload_session_urls` to `True` inilializes
the uploader that utlizies urls returned by the `Create Upload Session` endpoint response unless a custom
API.UPLOAD_URL was set in the config. Setting `use_upload_session_urls` to `False` inilializes the uploader that uses always base upload urls.
Calling the method [`chunked_upload.start()`][start] will kick off the chunked upload process and return the [File][file_class]
object that was uploaded.

<!-- samples x_chunked_uploads automatic -->
Expand All @@ -224,7 +231,10 @@ uploaded_file = chunked_uploader.start()
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
```

You can also upload file stream by creating a [`UploadSession`][upload_session_class] first and then calling the
You can also upload file stream by creating a [`UploadSession`][upload_session_class] first. This can be done by calling
[`folder.create_upload_session(file_size, file_name=None, use_upload_session_urls=True)`][create_upload_session] method.
`use_upload_session_urls` flag is used to determine if the upload session should use urls returned by
the `Create Upload Session` endpoint or should it always use base upload urls. Then you can call
method [`upload_session.get_chunked_uploader_for_stream(content_stream, file_size)`][get_chunked_uploader_for_stream].

```python
Expand All @@ -240,14 +250,14 @@ with open(test_file_path, 'rb') as content_stream:
#### Upload new file version

To upload a new file version for a large file, first get a file you want to replace.
Then call [`file.get_chunked_uploader(file_path)`][get_chunked_uploader_for_version]
Then call [`file.get_chunked_uploader(file_path, rename_file=False, use_upload_session_urls=True)`][get_chunked_uploader_for_version]
to retrieve a [`ChunkedUploader`][chunked_uploader_class] object. Calling the method [`chunked_upload.start()`][start]
will kick off the chunked upload process and return the updated [File][file_class].

<!-- samples x_chunked_uploads automatic_new_version -->
```python
# uploads new large file version
chunked_uploader = client.file('existing_big_file_id').get_chunked_uploader('/path/to/file')
chunked_uploader = client.file('existing_big_file_id').get_chunked_uploader(file_path='/path/to/file')
uploaded_file = chunked_uploader.start()
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
# the uploaded_file.id will be the same as 'existing_big_file_id'
Expand Down Expand Up @@ -293,17 +303,6 @@ except:
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
```

Alternatively, you can also create a [`UploadSession`][upload_session_class] object by calling
[`client.upload_session(session_id)`][upload_session] if you have the upload session id. This can be helpful in
resuming an existing upload session.


```python
chunked_uploader = client.upload_session('12345').get_chunked_uploader('/path/to/file')
uploaded_file = chunked_uploader.resume()
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
```

[resume]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.chunked_uploader.ChunkedUploader.resume

#### Abort Chunked Upload
Expand All @@ -317,7 +316,7 @@ from boxsdk.exception import BoxNetworkException
test_file_path = '/path/to/large_file.mp4'
content_stream = open(test_file_path, 'rb')
total_size = os.stat(test_file_path).st_size
chunked_uploader = client.upload_session('56781').get_chunked_uploader_for_stream(content_stream, total_size)
chunked_uploader = client.file('existing_big_file_id').get_chunked_uploader(file_path='/path/to/file')
try:
uploaded_file = chunked_uploader.start()
except BoxNetworkException:
Expand Down Expand Up @@ -371,8 +370,10 @@ The individual endpoint methods are detailed below:
#### Create Upload Session for File Version

To create an upload session for uploading a large version, call
[`file.create_upload_session(file_size, file_name=None)`][create_version_upload_session] with the size of the file to be
uploaded. You can optionally specify a new `file_name` to rename the file on upload. This method returns an
[`file.create_upload_session(file_size, file_name=None, use_upload_session_urls=True)`][create_version_upload_session]
with the size of the file to be uploaded. You can optionally specify a new `file_name` to rename the file on upload.
`use_upload_session_urls` flag is used to determine if the upload session should use urls returned by
the `Create Upload Session` endpoint or should it always use base upload urls. This method returns an
[`UploadSession`][upload_session_class] object representing the created upload session.

<!-- sample post_files_id_upload_sessions -->
Expand All @@ -388,9 +389,10 @@ print(f'Created upload session {upload_session.id} with chunk size of {upload_se
#### Create Upload Session for File

To create an upload session for uploading a new large file, call
[`folder.create_upload_session(file_size, file_name)`][create_upload_session] with the size and filename of the file
to be uploaded. This method returns an [`UploadSession`][upload_session_class] object representing the created upload
session.
[`folder.create_upload_session(file_size, file_name, use_upload_session_urls=True)`][create_upload_session] with
the size and filename of the file to be uploaded. `use_upload_session_urls` flag is used to determine if the upload
session should use urls returned by the `Create Upload Session` endpoint or should it always use base upload urls.
This method returns an [`UploadSession`][upload_session_class] object representing the created upload session.

<!-- sample post_files_upload_sessions -->
```python
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


CLASSIFIERS = [
'Development Status :: 5 - Production/Stable',
'Development Status :: 6 - Mature',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python',
Expand All @@ -18,6 +18,7 @@
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Operating System :: OS Independent',
Expand Down
19 changes: 17 additions & 2 deletions test/integration_new/object/folder_itest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,24 @@ def test_manual_chunked_upload(parent_folder, large_file, large_file_name):
util.permanently_delete(uploaded_file)


def test_auto_chunked_upload(parent_folder, large_file, large_file_name):
def test_auto_chunked_upload_using_upload_session_urls(parent_folder, large_file, large_file_name):
total_size = os.stat(large_file.path).st_size
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path)
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path, use_upload_session_urls=True)

uploaded_file = chunked_uploader.start()

try:
assert uploaded_file.id
assert uploaded_file.name == large_file_name
assert uploaded_file.parent == parent_folder
assert uploaded_file.size == total_size
finally:
util.permanently_delete(uploaded_file)


def test_auto_chunked_upload_NOT_using_upload_session_urls(parent_folder, large_file, large_file_name):
total_size = os.stat(large_file.path).st_size
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path, use_upload_session_urls=False)

uploaded_file = chunked_uploader.start()

Expand Down
Loading

0 comments on commit c67b03c

Please sign in to comment.