Skip to content

feature/deserializing-header #334

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions demo/node/rntuple.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,28 @@ else
else
console.error('FAILURE: test 3 - readString does not match');
}
if (!rntuple.builder?.fieldDescriptors?.length)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does this rntuple variable come from? I don't see any place where it is created, so this test will always fail right now.
Let's discuss later today with @linev how to setup the reading of a ROOT file that can give you a proper RNTupleDescriptorBuilder in the test.

console.error('FAILURE: No fields deserialized');
else {
console.log(`OK: ${rntuple.builder.fieldDescriptors.length} field(s) deserialized`);
for (let i = 0; i < rntuple.builder.fieldDescriptors.length; ++i) {
const field = rntuple.builder.fieldDescriptors[i];
if (!field.fieldName || !field.typeName)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we'll have a real known RNTuple that we'll be deserializing above, here you should also check the specific name and type of the fields, instead of just verifying they exist.

console.error(`FAILURE: Field ${i} is missing name or type`);
else
console.log(`OK: Field ${i}: ${field.fieldName} (${field.typeName})`);
}
}

if (!rntuple.builder?.columnDescriptors?.length)
console.error('FAILURE: No columns deserialized');
else {
console.log(`OK: ${rntuple.builder.columnDescriptors.length} column(s) deserialized`);
for (let i = 0; i < rntuple.builder.columnDescriptors.length; ++i) {
const col = rntuple.builder.columnDescriptors[i];
if (typeof col.fieldId !== 'number')
console.error(`FAILURE: Column ${i} is missing a valid fieldId`);
else
console.log(`OK: Column ${i}: fieldId = ${col.fieldId}, type = ${col.coltype}`);
}
}
5 changes: 0 additions & 5 deletions demo/node/rntuple_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@ if (rntuple.builder?.name !== 'Staff')
else
console.log('OK: name is', rntuple.builder?.name);

if (typeof rntuple.builder?.version !== 'number')
console.error('FAILURE: version is missing or invalid');
else
console.log('OK: version is', rntuple.builder.version);

if (!rntuple.builder?.description)
console.error('FAILURE: description is missing');
else
Expand Down
258 changes: 229 additions & 29 deletions modules/rntuple.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@
return val;
}

// Read 64-bit float (8 BYTES)
readF64() {
const val = this.view.getFloat64(this.offset, LITTLE_ENDIAN);
this.offset += 8;
return val;
}

// Read a string with 32-bit length prefix
readString() {
const length = this.readU32();
Expand All @@ -92,54 +99,247 @@
this.offset += 8;
return val;
}

}


class RNTupleDescriptorBuilder {

deserializeHeader(header_blob) {
if (!header_blob) return;

const reader = new RBufferReader(header_blob);
deserializeHeader(header_blob) {
if (!header_blob) return;

const reader = new RBufferReader(header_blob);
// Read the envelope metadata
this._readEnvelopeMetadata(reader);

// TODO: Validate the envelope checksum at the end of deserialization
// const payloadStart = reader.offset;

// Read feature flags list (may span multiple 64-bit words)
this._readFeatureFlags(reader);

// Read metadata strings
this.name = reader.readString();
this.description = reader.readString();
this.writer = reader.readString();
// TODO: Remove debug logs before finalizing
console.log('Name:', this.name);
console.log('Description:', this.description);
console.log('Writer:', this.writer);

// List frame: list of field record frames
this._readFieldDescriptors(reader);
// List frame: list of column record frames
this._readColumnDescriptors(reader);
// Read alias column descriptors
this._readAliasColumn(reader);
// Read Extra Type Information
this._readExtraTypeInformation(reader);
}

// 1. Read header version (4 bytes)
this.version = reader.readU32();

// 2. Read feature flags (4 bytes)
this.headerFeatureFlags = reader.readU32();
deserializeFooter(footer_blob) {
if (!footer_blob) return;

// 3. Read xxhash3 (64-bit, 8 bytes)
this.xxhash3 = reader.readU64();
const reader = new RBufferReader(footer_blob);
// Read the envelope metadata
this._readEnvelopeMetadata(reader);

// 4. Read name (length-prefixed string)
this.name = reader.readString();
// Feature flag(32 bits)
this.footerFeatureFlags = reader.readU32();
// Header checksum (64-bit xxhash3)
this.headerChecksum = reader.readU64();

// 5. Read description (length-prefixed string)
this.description = reader.readString();

// Schema extension record frame (4 list frames inside)
this._readFieldDescriptors(reader);
this._readColumnDescriptors(reader);
this._readAliasColumnDescriptors(reader);
this._readExtraTypeInfos(reader);

// Console output to verify deserialization results
console.log('Version:', this.version);
console.log('Header Feature Flags:', this.headerFeatureFlags);
console.log('xxhash3:', '0x' + this.xxhash3.toString(16).padStart(16, '0'));
console.log('Name:', this.name);
console.log('Description:', this.description);
// Cluster Group record frame
this._readClusterGroups(reader);
}


deserializeFooter(footer_blob) {
if (!footer_blob) return;
_readEnvelopeMetadata(reader) {
const typeAndLength = reader.readU64();

const reader = new RBufferReader(footer_blob);
// Envelope metadata
// The 16 bits are the envelope type ID, and the 48 bits are the envelope length
this.envelopeType = Number(typeAndLength & 0xFFFFn);
this.envelopeLength = Number((typeAndLength >> 16n) & 0xFFFFFFFFFFFFn);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can just be Number(typeAndLength >>> 16n) since the unsigned right shift will zero the upper bits. (important to use the triple >>>)


this.footerFeatureFlags = reader.readU32();
this.headerChecksum = reader.readU32();
console.log('Envelope Type ID:', this.envelopeType);
console.log('Envelope Length:', this.envelopeLength);
}
_readFeatureFlags(reader) {
this.featureFlags = [];
while (true) {
const val = reader.readU64();
this.featureFlags.push(val);
if ((val & 0x8000000000000000n) === 0n) break; // MSB not set: end of list
}

// verify all feature flags are zero
if (this.featureFlags.some(v => v !== 0n))
throw new Error('Unexpected non-zero feature flags: ' + this.featureFlags);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please indent this line properly

}

console.log('Footer Feature Flags:', this.footerFeatureFlags);
console.log('Header Checksum:', this.headerChecksum);
_readFieldDescriptors(reader) {
const fieldListSize = reader.readS64(), // signed 64-bit
fieldListIsList = fieldListSize < 0;


if (!fieldListIsList)
throw new Error('Field list frame is not a list frame, which is required.');

const fieldListCount = reader.readU32(); // number of field entries
console.log('Field List Count:', fieldListCount);

// List frame: list of field record frames

const fieldDescriptors = [];
for (let i = 0; i < fieldListCount; ++i) {
const fieldVersion = reader.readU32(),
typeVersion = reader.readU32(),
parentFieldId = reader.readU32(),
structRole = reader.readU16(),
flags = reader.readU16(),

fieldName = reader.readString(),
typeName = reader.readString(),
typeAlias = reader.readString(),
description = reader.readString();
let arraySize = null, sourceFieldId = null, checksum = null;

if (flags & 0x1) arraySize = reader.readU32();
if (flags & 0x2) sourceFieldId = reader.readU32();
if (flags & 0x4) checksum = reader.readU32();

fieldDescriptors.push({
fieldVersion,
typeVersion,
parentFieldId,
structRole,
flags,
fieldName,
typeName,
typeAlias,
description,
arraySize,
sourceFieldId,
checksum
});
console.log(`Field ${i + 1}:`, fieldName, '&&', typeName);
}
this.fieldDescriptors = fieldDescriptors;
}
_readColumnDescriptors(reader) {
this.columnListSize = reader.readS64(); // signed 64-bit
const columnListIsList = this.columnListSize < 0;
if (!columnListIsList)
throw new Error('Column list frame is not a list frame, which is required.');
const columnListCount = reader.readU32(); // number of column entries
console.log('Column List Count:', columnListCount);
const columnDescriptors = [];
for (let i = 0; i < columnListCount; ++i) {
const coltype = reader.readU16(),
bitsOnStrorage = reader.readU16(),
fieldId = reader.readU32(),
flags = reader.readU16(),
representationIndex = reader.readU16();

let firstElementIndex = null, minValue = null, maxValue = null;
if (flags & 0x1) firstElementIndex = reader.readU64();
if (flags & 0x2){
minValue = reader.readF64();
maxValue = reader.readF64();
}

const column = {
coltype,
bitsOnStrorage,
fieldId,
flags,
representationIndex,
firstElementIndex,
minValue,
maxValue
};

column.isDeferred = function () {

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-windows (18.x)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-macos (18.x)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-macos (22.x)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-windows (20.x)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-windows (22.x)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-macos (20.x)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (22.x, g++-13)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (18.x, g++-13)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (18.x, g++-14)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (22.x, g++-14)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (20.x, g++-12)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (20.x, g++-13)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (20.x, g++-14)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (18.x, g++-12)

Unexpected space before function parentheses

Check warning on line 270 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (22.x, g++-12)

Unexpected space before function parentheses
return (this.flags & 0x01) !== 0;
};

column.isSuppressed = function () {

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-windows (18.x)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-macos (18.x)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-macos (22.x)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-windows (20.x)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-windows (22.x)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-macos (20.x)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (22.x, g++-13)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (18.x, g++-13)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (18.x, g++-14)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (22.x, g++-14)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (20.x, g++-12)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (20.x, g++-13)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (20.x, g++-14)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (18.x, g++-12)

Unexpected space before function parentheses

Check warning on line 274 in modules/rntuple.mjs

View workflow job for this annotation

GitHub Actions / build-ubuntu (22.x, g++-12)

Unexpected space before function parentheses
return this.firstElementIndex !== null && this.firstElementIndex < 0;
};

columnDescriptors.push(column);
}
this.columnDescriptors = columnDescriptors;
}
_readAliasColumn(reader){
this.aliasColumnListSize = reader.readS64(); // signed 64-bit
const aliasListisList = this.aliasColumnListSize < 0;
if (!aliasListisList)
throw new Error('Alias column list frame is not a list frame, which is required.');
const aliasColumnCount = reader.readU32(); // number of alias column entries
console.log('Alias Column List Count:', aliasColumnCount);
const aliasColumns = [];
for (let i = 0; i < aliasColumnCount; ++i){
const physicalColumnId = reader.readU32(),
fieldId = reader.readU32();
aliasColumns.push({
physicalColumnId,
fieldId
});
}
this.aliasColumns = aliasColumns;
}
_readExtraTypeInformation(reader) {
this.extraTypeInfoListSize = reader.readS64(); // signed 64-bit
const isList = this.extraTypeInfoListSize < 0;

if (!isList)
throw new Error('Extra type info frame is not a list frame, which is required.');

const entryCount = reader.readU32(); // number of extra type info entries
console.log('Extra Type Info Count:', entryCount);

const extraTypeInfo = [];
for (let i = 0; i < entryCount; ++i) {
const contentId = reader.readU32(),
typeVersion = reader.readU32();
extraTypeInfo.push({
contentId,
typeVersion
});
}
this.extraTypeInfo = extraTypeInfo;
}
_readClusterGroups(reader){
const clusterGroupListSize = reader.readS64(),
isList = clusterGroupListSize < 0;
if (!isList)
throw new Error('Cluster group frame is not a list frame.');

const clusterGroupCount = reader.readU32();
console.log('Cluster Group Count:', clusterGroupCount);

const clusterGroups = [];
for (let i = 0; i < clusterGroupCount; ++i) {
const minEntry = reader.readS64(),
entrySpan = reader.readS64(),
numClusters = reader.readU32();
clusterGroups.push({
minEntry,
entrySpan,
numClusters
});
}
this.clusterGroups = clusterGroups;
}

}

Expand Down
Loading