-
Notifications
You must be signed in to change notification settings - Fork 80
feature/deserializing-header #334
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e9775ad
e11648f
d79c587
321816a
2948249
7ae6a2c
9d62f32
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,3 +62,28 @@ else | |
else | ||
console.error('FAILURE: test 3 - readString does not match'); | ||
} | ||
if (!rntuple.builder?.fieldDescriptors?.length) | ||
console.error('FAILURE: No fields deserialized'); | ||
else { | ||
console.log(`OK: ${rntuple.builder.fieldDescriptors.length} field(s) deserialized`); | ||
for (let i = 0; i < rntuple.builder.fieldDescriptors.length; ++i) { | ||
const field = rntuple.builder.fieldDescriptors[i]; | ||
if (!field.fieldName || !field.typeName) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we'll have a real known RNTuple that we'll be deserializing above, here you should also check the specific name and type of the fields, instead of just verifying they exist. |
||
console.error(`FAILURE: Field ${i} is missing name or type`); | ||
else | ||
console.log(`OK: Field ${i}: ${field.fieldName} (${field.typeName})`); | ||
} | ||
} | ||
|
||
if (!rntuple.builder?.columnDescriptors?.length) | ||
console.error('FAILURE: No columns deserialized'); | ||
else { | ||
console.log(`OK: ${rntuple.builder.columnDescriptors.length} column(s) deserialized`); | ||
for (let i = 0; i < rntuple.builder.columnDescriptors.length; ++i) { | ||
const col = rntuple.builder.columnDescriptors[i]; | ||
if (typeof col.fieldId !== 'number') | ||
console.error(`FAILURE: Column ${i} is missing a valid fieldId`); | ||
else | ||
console.log(`OK: Column ${i}: fieldId = ${col.fieldId}, type = ${col.coltype}`); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,6 +70,13 @@ | |
return val; | ||
} | ||
|
||
// Read 64-bit float (8 BYTES) | ||
readF64() { | ||
const val = this.view.getFloat64(this.offset, LITTLE_ENDIAN); | ||
this.offset += 8; | ||
return val; | ||
} | ||
|
||
// Read a string with 32-bit length prefix | ||
readString() { | ||
const length = this.readU32(); | ||
|
@@ -92,54 +99,247 @@ | |
this.offset += 8; | ||
return val; | ||
} | ||
|
||
} | ||
|
||
|
||
class RNTupleDescriptorBuilder { | ||
|
||
deserializeHeader(header_blob) { | ||
if (!header_blob) return; | ||
|
||
const reader = new RBufferReader(header_blob); | ||
deserializeHeader(header_blob) { | ||
if (!header_blob) return; | ||
|
||
const reader = new RBufferReader(header_blob); | ||
// Read the envelope metadata | ||
this._readEnvelopeMetadata(reader); | ||
|
||
// TODO: Validate the envelope checksum at the end of deserialization | ||
// const payloadStart = reader.offset; | ||
|
||
// Read feature flags list (may span multiple 64-bit words) | ||
this._readFeatureFlags(reader); | ||
|
||
// Read metadata strings | ||
this.name = reader.readString(); | ||
this.description = reader.readString(); | ||
this.writer = reader.readString(); | ||
// TODO: Remove debug logs before finalizing | ||
console.log('Name:', this.name); | ||
console.log('Description:', this.description); | ||
console.log('Writer:', this.writer); | ||
|
||
// List frame: list of field record frames | ||
this._readFieldDescriptors(reader); | ||
// List frame: list of column record frames | ||
this._readColumnDescriptors(reader); | ||
// Read alias column descriptors | ||
this._readAliasColumn(reader); | ||
// Read Extra Type Information | ||
this._readExtraTypeInformation(reader); | ||
} | ||
|
||
// 1. Read header version (4 bytes) | ||
this.version = reader.readU32(); | ||
|
||
// 2. Read feature flags (4 bytes) | ||
this.headerFeatureFlags = reader.readU32(); | ||
deserializeFooter(footer_blob) { | ||
if (!footer_blob) return; | ||
|
||
// 3. Read xxhash3 (64-bit, 8 bytes) | ||
this.xxhash3 = reader.readU64(); | ||
const reader = new RBufferReader(footer_blob); | ||
// Read the envelope metadata | ||
this._readEnvelopeMetadata(reader); | ||
|
||
// 4. Read name (length-prefixed string) | ||
this.name = reader.readString(); | ||
// Feature flag(32 bits) | ||
this.footerFeatureFlags = reader.readU32(); | ||
// Header checksum (64-bit xxhash3) | ||
this.headerChecksum = reader.readU64(); | ||
|
||
// 5. Read description (length-prefixed string) | ||
this.description = reader.readString(); | ||
|
||
// Schema extension record frame (4 list frames inside) | ||
this._readFieldDescriptors(reader); | ||
this._readColumnDescriptors(reader); | ||
this._readAliasColumnDescriptors(reader); | ||
this._readExtraTypeInfos(reader); | ||
|
||
// Console output to verify deserialization results | ||
console.log('Version:', this.version); | ||
console.log('Header Feature Flags:', this.headerFeatureFlags); | ||
console.log('xxhash3:', '0x' + this.xxhash3.toString(16).padStart(16, '0')); | ||
console.log('Name:', this.name); | ||
console.log('Description:', this.description); | ||
// Cluster Group record frame | ||
this._readClusterGroups(reader); | ||
} | ||
|
||
|
||
deserializeFooter(footer_blob) { | ||
if (!footer_blob) return; | ||
_readEnvelopeMetadata(reader) { | ||
const typeAndLength = reader.readU64(); | ||
|
||
const reader = new RBufferReader(footer_blob); | ||
// Envelope metadata | ||
// The 16 bits are the envelope type ID, and the 48 bits are the envelope length | ||
this.envelopeType = Number(typeAndLength & 0xFFFFn); | ||
this.envelopeLength = Number((typeAndLength >> 16n) & 0xFFFFFFFFFFFFn); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can just be |
||
|
||
this.footerFeatureFlags = reader.readU32(); | ||
this.headerChecksum = reader.readU32(); | ||
console.log('Envelope Type ID:', this.envelopeType); | ||
console.log('Envelope Length:', this.envelopeLength); | ||
} | ||
_readFeatureFlags(reader) { | ||
this.featureFlags = []; | ||
while (true) { | ||
const val = reader.readU64(); | ||
this.featureFlags.push(val); | ||
if ((val & 0x8000000000000000n) === 0n) break; // MSB not set: end of list | ||
} | ||
|
||
// verify all feature flags are zero | ||
if (this.featureFlags.some(v => v !== 0n)) | ||
throw new Error('Unexpected non-zero feature flags: ' + this.featureFlags); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please indent this line properly |
||
} | ||
|
||
console.log('Footer Feature Flags:', this.footerFeatureFlags); | ||
console.log('Header Checksum:', this.headerChecksum); | ||
_readFieldDescriptors(reader) { | ||
const fieldListSize = reader.readS64(), // signed 64-bit | ||
fieldListIsList = fieldListSize < 0; | ||
|
||
|
||
if (!fieldListIsList) | ||
throw new Error('Field list frame is not a list frame, which is required.'); | ||
|
||
const fieldListCount = reader.readU32(); // number of field entries | ||
console.log('Field List Count:', fieldListCount); | ||
|
||
// List frame: list of field record frames | ||
|
||
const fieldDescriptors = []; | ||
for (let i = 0; i < fieldListCount; ++i) { | ||
const fieldVersion = reader.readU32(), | ||
typeVersion = reader.readU32(), | ||
parentFieldId = reader.readU32(), | ||
structRole = reader.readU16(), | ||
flags = reader.readU16(), | ||
|
||
fieldName = reader.readString(), | ||
typeName = reader.readString(), | ||
typeAlias = reader.readString(), | ||
description = reader.readString(); | ||
let arraySize = null, sourceFieldId = null, checksum = null; | ||
|
||
if (flags & 0x1) arraySize = reader.readU32(); | ||
if (flags & 0x2) sourceFieldId = reader.readU32(); | ||
if (flags & 0x4) checksum = reader.readU32(); | ||
|
||
fieldDescriptors.push({ | ||
fieldVersion, | ||
typeVersion, | ||
parentFieldId, | ||
structRole, | ||
flags, | ||
fieldName, | ||
typeName, | ||
typeAlias, | ||
description, | ||
arraySize, | ||
sourceFieldId, | ||
checksum | ||
}); | ||
console.log(`Field ${i + 1}:`, fieldName, '&&', typeName); | ||
} | ||
Krmjn09 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this.fieldDescriptors = fieldDescriptors; | ||
} | ||
_readColumnDescriptors(reader) { | ||
this.columnListSize = reader.readS64(); // signed 64-bit | ||
const columnListIsList = this.columnListSize < 0; | ||
if (!columnListIsList) | ||
throw new Error('Column list frame is not a list frame, which is required.'); | ||
const columnListCount = reader.readU32(); // number of column entries | ||
console.log('Column List Count:', columnListCount); | ||
const columnDescriptors = []; | ||
for (let i = 0; i < columnListCount; ++i) { | ||
const coltype = reader.readU16(), | ||
bitsOnStrorage = reader.readU16(), | ||
fieldId = reader.readU32(), | ||
flags = reader.readU16(), | ||
representationIndex = reader.readU16(); | ||
|
||
let firstElementIndex = null, minValue = null, maxValue = null; | ||
if (flags & 0x1) firstElementIndex = reader.readU64(); | ||
if (flags & 0x2){ | ||
minValue = reader.readF64(); | ||
maxValue = reader.readF64(); | ||
} | ||
|
||
const column = { | ||
coltype, | ||
bitsOnStrorage, | ||
fieldId, | ||
flags, | ||
representationIndex, | ||
firstElementIndex, | ||
minValue, | ||
maxValue | ||
}; | ||
|
||
column.isDeferred = function () { | ||
Check warning on line 270 in modules/rntuple.mjs
|
||
return (this.flags & 0x01) !== 0; | ||
}; | ||
|
||
column.isSuppressed = function () { | ||
Check warning on line 274 in modules/rntuple.mjs
|
||
return this.firstElementIndex !== null && this.firstElementIndex < 0; | ||
}; | ||
|
||
columnDescriptors.push(column); | ||
} | ||
this.columnDescriptors = columnDescriptors; | ||
} | ||
_readAliasColumn(reader){ | ||
silverweed marked this conversation as resolved.
Show resolved
Hide resolved
|
||
this.aliasColumnListSize = reader.readS64(); // signed 64-bit | ||
const aliasListisList = this.aliasColumnListSize < 0; | ||
if (!aliasListisList) | ||
throw new Error('Alias column list frame is not a list frame, which is required.'); | ||
const aliasColumnCount = reader.readU32(); // number of alias column entries | ||
console.log('Alias Column List Count:', aliasColumnCount); | ||
const aliasColumns = []; | ||
for (let i = 0; i < aliasColumnCount; ++i){ | ||
const physicalColumnId = reader.readU32(), | ||
fieldId = reader.readU32(); | ||
aliasColumns.push({ | ||
physicalColumnId, | ||
fieldId | ||
}); | ||
} | ||
this.aliasColumns = aliasColumns; | ||
} | ||
_readExtraTypeInformation(reader) { | ||
this.extraTypeInfoListSize = reader.readS64(); // signed 64-bit | ||
const isList = this.extraTypeInfoListSize < 0; | ||
|
||
if (!isList) | ||
throw new Error('Extra type info frame is not a list frame, which is required.'); | ||
|
||
const entryCount = reader.readU32(); // number of extra type info entries | ||
console.log('Extra Type Info Count:', entryCount); | ||
|
||
const extraTypeInfo = []; | ||
for (let i = 0; i < entryCount; ++i) { | ||
const contentId = reader.readU32(), | ||
typeVersion = reader.readU32(); | ||
extraTypeInfo.push({ | ||
contentId, | ||
typeVersion | ||
}); | ||
} | ||
this.extraTypeInfo = extraTypeInfo; | ||
} | ||
_readClusterGroups(reader){ | ||
const clusterGroupListSize = reader.readS64(), | ||
isList = clusterGroupListSize < 0; | ||
if (!isList) | ||
throw new Error('Cluster group frame is not a list frame.'); | ||
|
||
const clusterGroupCount = reader.readU32(); | ||
console.log('Cluster Group Count:', clusterGroupCount); | ||
|
||
const clusterGroups = []; | ||
for (let i = 0; i < clusterGroupCount; ++i) { | ||
const minEntry = reader.readS64(), | ||
entrySpan = reader.readS64(), | ||
numClusters = reader.readU32(); | ||
clusterGroups.push({ | ||
minEntry, | ||
entrySpan, | ||
numClusters | ||
}); | ||
} | ||
this.clusterGroups = clusterGroups; | ||
} | ||
|
||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where does this
rntuple
variable come from? I don't see any place where it is created, so this test will always fail right now.Let's discuss later today with @linev how to setup the reading of a ROOT file that can give you a proper RNTupleDescriptorBuilder in the test.