Skip to content

Commit 77cec71

Browse files
committed
v4.2.1 Release
1 parent dfb8a5a commit 77cec71

File tree

6 files changed

+222
-6
lines changed

6 files changed

+222
-6
lines changed

python/deeplake/__init__.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def progress_bar(iterable, *args, **kwargs):
1616
from deeplake.ingestion import from_coco
1717

1818

19-
__version__ = "4.1.16"
19+
__version__ = "4.2.1"
2020

2121
__all__ = [
2222
"__version__",
@@ -50,6 +50,7 @@ def progress_bar(iterable, *args, **kwargs):
5050
"DimensionsMismatch",
5151
"DtypeMismatch",
5252
"EmbeddingSizeMismatch",
53+
"Executor",
5354
"ExpiredTokenError",
5455
"FormatNotSupportedError",
5556
"Future",
@@ -125,6 +126,7 @@ def progress_bar(iterable, *args, **kwargs):
125126
"UnsupportedPythonType",
126127
"UnsupportedSampleCompression",
127128
"Version",
129+
"VersionNotFoundError",
128130
"WriteFailedError",
129131
"WrongChunkCompression",
130132
"WrongSampleCompression",
@@ -146,6 +148,7 @@ def progress_bar(iterable, *args, **kwargs):
146148
"open_async",
147149
"open_read_only",
148150
"open_read_only_async",
151+
"prepare_query",
149152
"query",
150153
"query_async",
151154
"schemas",
@@ -239,7 +242,9 @@ def transfer_with_links(source, dest, links, column_names):
239242
iterable_cols = [col for col in column_names if col not in links]
240243
link_sample_info = {link: source[link]._links_info() for link in links}
241244
dest.set_creds_key(link_sample_info[links[0]]["key"])
242-
pref_ds = source.query(f"SELECT {','.join(iterable_cols)}")
245+
quoted_cols = ['"' + col + '"' for col in iterable_cols]
246+
joined_cols = ",".join(quoted_cols)
247+
pref_ds = source.query(f"SELECT {joined_cols}")
243248
dl = deeplake._deeplake._Prefetcher(pref_ds, raw_columns=set(get_raw_columns(source)))
244249

245250
for counter, batch in enumerate(progress_bar(dl), start=1):
@@ -255,7 +260,7 @@ def transfer_with_links(source, dest, links, column_names):
255260
commit_data(dest)
256261
commit_data(dest, "Final commit of linked data")
257262

258-
source_ds = deeplake.query(f'select * from "{src}"', token=token)
263+
source_ds = deeplake.query(f'SELECT * FROM "{src}"', token=token)
259264
dest_ds = deeplake.like(source_ds, dst, dst_creds, token=token)
260265
commit_data(dest_ds, "Created dataset")
261266

python/deeplake/__init__.pyi

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ __all__ = [
3838
"DimensionsMismatch",
3939
"DtypeMismatch",
4040
"EmbeddingSizeMismatch",
41+
"Executor",
4142
"ExpiredTokenError",
4243
"FormatNotSupportedError",
4344
"Future",
@@ -113,6 +114,7 @@ __all__ = [
113114
"UnsupportedPythonType",
114115
"UnsupportedSampleCompression",
115116
"Version",
117+
"VersionNotFoundError",
116118
"WriteFailedError",
117119
"WrongChunkCompression",
118120
"WrongSampleCompression",
@@ -134,6 +136,7 @@ __all__ = [
134136
"open_async",
135137
"open_read_only",
136138
"open_read_only_async",
139+
"prepare_query",
137140
"query",
138141
"query_async",
139142
"schemas",
@@ -160,6 +163,9 @@ class Future:
160163
__await__() -> typing.Any:
161164
Enables using the Future in async/await syntax.
162165
166+
cancel() -> None:
167+
Cancels the Future if it is still pending.
168+
163169
is_completed() -> bool:
164170
Checks if the Future has resolved without blocking.
165171
<!-- test-context
@@ -274,6 +280,11 @@ class Future:
274280
"""
275281
...
276282

283+
def cancel(self) -> None:
284+
"""
285+
Cancels the Future if it is still pending.
286+
"""
287+
277288
class FutureVoid:
278289
"""
279290
A Future representing a void async operation in ML pipelines.
@@ -291,6 +302,9 @@ class FutureVoid:
291302
is_completed() -> bool:
292303
Checks completion status without blocking.
293304
305+
cancel() -> None:
306+
Cancels the Future if still pending.
307+
294308
<!-- test-context
295309
```python
296310
import deeplake
@@ -362,6 +376,11 @@ class FutureVoid:
362376
"""
363377
...
364378

379+
def cancel(self) -> None:
380+
"""
381+
Cancels the Future if it is still pending.
382+
"""
383+
365384
def is_completed(self) -> bool:
366385
"""
367386
Checks if the operation has completed without blocking.
@@ -540,6 +559,38 @@ class Metadata(ReadOnlyMetadata):
540559
"""
541560
...
542561

562+
def prepare_query(query: str, token: str | None = None, creds: dict[str, str] | None = None) -> Executor:
563+
"""
564+
Prepares a TQL query for execution with optional authentication.
565+
566+
Args:
567+
query: TQL query string to execute
568+
token: Optional Activeloop authentication token
569+
creds (dict, optional): Dictionary containing credentials used to access the dataset at the path.
570+
571+
Returns:
572+
Executor: An executor object to run the query.
573+
574+
<!-- test-context
575+
```python
576+
import deeplake
577+
ds = deeplake.create("mem://parametriized")
578+
ds.add_column("category", "text")
579+
ds.append({"category": ["active", "inactive", "not sure"]})
580+
ds.commit()
581+
```
582+
-->
583+
584+
Examples:
585+
Running a parametrized batch query:
586+
```python
587+
ex = deeplake.prepare_query('SELECT * FROM "mem://parametriized" WHERE category = ?')
588+
results = ex.run_batch([["active"], ["inactive"]])
589+
assert len(results) == 2
590+
```
591+
"""
592+
...
593+
543594
def query(query: str, token: str | None = None, creds: dict[str, str] | None = None) -> DatasetView:
544595
"""
545596
Executes TQL queries optimized for ML data filtering and search.
@@ -1627,6 +1678,11 @@ class Row:
16271678
or await the FutureVoid object in an asynchronous context.
16281679
"""
16291680

1681+
def to_dict(self) -> dict:
1682+
"""
1683+
Converts the row to a dictionary.
1684+
"""
1685+
16301686
def __str__(self) -> str: ...
16311687
@property
16321688
def row_id(self) -> int:
@@ -1832,6 +1888,11 @@ class RowView:
18321888
or use the Future in an `await` expression.
18331889
"""
18341890

1891+
def to_dict(self) -> dict:
1892+
"""
1893+
Converts the row to a dictionary.
1894+
"""
1895+
18351896
def __str__(self) -> str: ...
18361897
@property
18371898
def row_id(self) -> int:
@@ -1942,6 +2003,35 @@ class DatasetView:
19422003
"""
19432004
...
19442005

2006+
def prepare_query(self, query: str) -> Executor:
2007+
"""
2008+
Prepares a query for execution.
2009+
2010+
Parameters:
2011+
query: The query to prepare
2012+
2013+
Returns:
2014+
Executor: The prepared query
2015+
2016+
<!-- test-context
2017+
```python
2018+
import deeplake
2019+
ds = deeplake.create("tmp://")
2020+
ds.add_column("category", "text")
2021+
ds.append({"category": ["active", "inactive", "not sure"]})
2022+
```
2023+
-->
2024+
2025+
Examples:
2026+
```python
2027+
executor = ds.prepare_query("select * where category == ?")
2028+
results = executor.run_batch([['active'], ['inactive'], ['not sure']])
2029+
for row in results:
2030+
print("Id is: ", row["category"])
2031+
```
2032+
"""
2033+
...
2034+
19452035
def query(self, query: str) -> DatasetView:
19462036
"""
19472037
Executes the given TQL query against the dataset and return the results as a [deeplake.DatasetView][].
@@ -2675,6 +2765,26 @@ class Dataset(DatasetView):
26752765
"""
26762766
...
26772767

2768+
def refresh(
2769+
self
2770+
) -> None:
2771+
"""
2772+
Refreshes any new info from the dataset.
2773+
2774+
Similar to [deeplake.Dataset.open_read_only][] but the lightweight way.
2775+
"""
2776+
...
2777+
2778+
def refresh_async(
2779+
self
2780+
) -> FutureVoid:
2781+
"""
2782+
Asynchronously refreshes any new info from the dataset.
2783+
2784+
Similar to [deeplake.Dataset.open_read_only_async][] but the lightweight way.
2785+
"""
2786+
...
2787+
26782788
@property
26792789
def history(self) -> History:
26802790
"""
@@ -2816,6 +2926,26 @@ class ReadOnlyDataset(DatasetView):
28162926
"""
28172927
...
28182928

2929+
def refresh(
2930+
self
2931+
) -> None:
2932+
"""
2933+
Refreshes any new info from the dataset.
2934+
2935+
Similar to [deeplake.Dataset.open_read_only][] but the lightweight way.
2936+
"""
2937+
...
2938+
2939+
def refresh_async(
2940+
self
2941+
) -> FutureVoid:
2942+
"""
2943+
Asynchronously refreshes any new info from the dataset.
2944+
2945+
Similar to [deeplake.Dataset.open_read_only_async][] but the lightweight way.
2946+
"""
2947+
...
2948+
28192949
def __getstate__(self) -> tuple:
28202950
"""Returns a dict that can be pickled and used to restore this dataset.
28212951
@@ -2830,6 +2960,14 @@ class ReadOnlyDataset(DatasetView):
28302960
state (dict): The pickled state used to restore the dataset.
28312961
"""
28322962

2963+
class Executor:
2964+
def get_query_string(self) -> str:
2965+
...
2966+
def run_single(self) -> DatasetView:
2967+
...
2968+
def run_batch(self, parameters: list = None) -> list:
2969+
...
2970+
28332971
class ExpiredTokenError(Exception):
28342972
pass
28352973

@@ -3132,6 +3270,9 @@ class StorageNetworkConnectionError(Exception):
31323270
class StorageInternalError(Exception):
31333271
pass
31343272

3273+
class VersionNotFoundError(Exception):
3274+
pass
3275+
31353276
class WriteFailedError(Exception):
31363277
pass
31373278

@@ -3703,4 +3844,4 @@ class TelemetryClient:
37033844
Client for logging deeplake messages to telemetry.
37043845
"""
37053846
endpoint: str
3706-
api_key: str
3847+
api_key: str

python/deeplake/storage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from ._deeplake.types import *
22

3-
__all__ = ["Reader", "Writer", "ResourceMeta"]
3+
__all__ = ["Reader", "Writer", "ResourceMeta", "concurrency", "set_concurrency"]

python/deeplake/storage.pyi

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import datetime
77
import deeplake._deeplake.core
88
import typing
99

10-
__all__ = ["Reader", "Writer", "ResourceMeta"]
10+
__all__ = ["Reader", "Writer", "ResourceMeta", "concurrency", "set_concurrency"]
1111

1212
class Reader:
1313
def __getstate__(self) -> tuple: ...
@@ -59,3 +59,37 @@ class ResourceMeta:
5959
@property
6060
def size(self) -> int:
6161
...
62+
63+
def concurrency() -> int:
64+
"""
65+
Returns the number of threads of storage readers and writers.
66+
67+
<!-- test-context
68+
```python
69+
import deeplake
70+
```
71+
-->
72+
73+
Examples:
74+
```python
75+
deeplake.storage.concurrency()
76+
```
77+
"""
78+
...
79+
80+
def set_concurrency(num_threads: int) -> None:
81+
"""
82+
Sets the number of threads of storage readers and writers.
83+
84+
<!-- test-context
85+
```python
86+
import deeplake
87+
```
88+
-->
89+
90+
Examples:
91+
```python
92+
deeplake.storage.set_concurrency(64)
93+
```
94+
"""
95+

python/deeplake/tql.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from ._deeplake.tql import *
22

33
__all__ = [
4+
"get_max_num_parallel_queries",
45
"register_function",
6+
"set_max_num_parallel_queries",
57
]

0 commit comments

Comments
 (0)