File tree 3 files changed +23
-2
lines changed
extension/flat_tensor/serialize
3 files changed +23
-2
lines changed Original file line number Diff line number Diff line change @@ -35,8 +35,8 @@ table TensorMetadata {
35
35
// To retrieve a given tensor:
36
36
// 1. segment_base_offset: from the file header.
37
37
// 2. segment_offset: segments[segment_index].offset
38
- // 3. tensor_offset: segments[segment_offset].tensor_metadata[j]. offset
39
- // Find the relevant index j by matching on tensor fqn .
38
+ // 3. tensor_offset: the offset within the segment. If there is only one item
39
+ // in the segment, offset=0 .
40
40
offset: uint64;
41
41
}
42
42
@@ -55,6 +55,15 @@ table DataSegment {
55
55
size: uint64;
56
56
}
57
57
58
+ // Attributes a name to data referenced by FlatTensor.segments.
59
+ table NamedData {
60
+ // The unique id of the data blob.
61
+ key: string;
62
+
63
+ // Index of the segment in FlatTensor.segments.
64
+ segment_index: uint32;
65
+ }
66
+
58
67
// FlatTensor is a flatbuffer-based format for storing and loading tensors.
59
68
table FlatTensor {
60
69
// Schema version.
@@ -70,6 +79,10 @@ table FlatTensor {
70
79
// List of data segments that follow the FlatTensor data in this file, sorted by
71
80
// offset. Elements in this schema can refer to these segments by index.
72
81
segments: [DataSegment];
82
+
83
+ // List of blobs keyed by a unique name. Note that multiple 'NamedData'
84
+ // entries could point to the same segment index.
85
+ named_data: [NamedData];
73
86
}
74
87
75
88
root_type FlatTensor;
Original file line number Diff line number Diff line change @@ -31,9 +31,16 @@ class DataSegment:
31
31
size : int
32
32
33
33
34
+ @dataclass
35
+ class NamedData :
36
+ key : str
37
+ segment_index : int
38
+
39
+
34
40
@dataclass
35
41
class FlatTensor :
36
42
version : int
37
43
tensor_alignment : int
38
44
tensors : List [TensorMetadata ]
39
45
segments : List [DataSegment ]
46
+ named_data : List [NamedData ]
Original file line number Diff line number Diff line change @@ -282,6 +282,7 @@ def serialize(
282
282
tensor_alignment = self .config .tensor_alignment ,
283
283
tensors = flat_tensor_metadata ,
284
284
segments = [DataSegment (offset = 0 , size = len (flat_tensor_data ))],
285
+ named_data = [],
285
286
)
286
287
287
288
flatbuffer_payload = _serialize_to_flatbuffer (flat_tensor )
You can’t perform that action at this time.
0 commit comments