diff --git a/demo/node/rntuple.js b/demo/node/rntuple.js index 5dc470ec2..3b19018f2 100644 --- a/demo/node/rntuple.js +++ b/demo/node/rntuple.js @@ -62,3 +62,28 @@ else else console.error('FAILURE: test 3 - readString does not match'); } +if (!rntuple.builder?.fieldDescriptors?.length) + console.error('FAILURE: No fields deserialized'); +else { + console.log(`OK: ${rntuple.builder.fieldDescriptors.length} field(s) deserialized`); + for (let i = 0; i < rntuple.builder.fieldDescriptors.length; ++i) { + const field = rntuple.builder.fieldDescriptors[i]; + if (!field.fieldName || !field.typeName) + console.error(`FAILURE: Field ${i} is missing name or type`); + else + console.log(`OK: Field ${i}: ${field.fieldName} (${field.typeName})`); + } +} + +if (!rntuple.builder?.columnDescriptors?.length) + console.error('FAILURE: No columns deserialized'); +else { + console.log(`OK: ${rntuple.builder.columnDescriptors.length} column(s) deserialized`); + for (let i = 0; i < rntuple.builder.columnDescriptors.length; ++i) { + const col = rntuple.builder.columnDescriptors[i]; + if (typeof col.fieldId !== 'number') + console.error(`FAILURE: Column ${i} is missing a valid fieldId`); + else + console.log(`OK: Column ${i}: fieldId = ${col.fieldId}, type = ${col.coltype}`); + } +} diff --git a/demo/node/rntuple_test.js b/demo/node/rntuple_test.js index 3712eb199..fc18361dd 100644 --- a/demo/node/rntuple_test.js +++ b/demo/node/rntuple_test.js @@ -18,11 +18,6 @@ if (rntuple.builder?.name !== 'Staff') else console.log('OK: name is', rntuple.builder?.name); -if (typeof rntuple.builder?.version !== 'number') - console.error('FAILURE: version is missing or invalid'); -else - console.log('OK: version is', rntuple.builder.version); - if (!rntuple.builder?.description) console.error('FAILURE: description is missing'); else diff --git a/modules/rntuple.mjs b/modules/rntuple.mjs index d2cde094c..1e6c16187 100644 --- a/modules/rntuple.mjs +++ b/modules/rntuple.mjs @@ -70,6 +70,13 @@ class RBufferReader { return val; } + // Read 64-bit float (8 BYTES) + readF64() { + const val = this.view.getFloat64(this.offset, LITTLE_ENDIAN); + this.offset += 8; + return val; + } + // Read a string with 32-bit length prefix readString() { const length = this.readU32(); @@ -92,54 +99,247 @@ class RBufferReader { this.offset += 8; return val; } - + } class RNTupleDescriptorBuilder { - - deserializeHeader(header_blob) { - if (!header_blob) return; - const reader = new RBufferReader(header_blob); +deserializeHeader(header_blob) { + if (!header_blob) return; + + const reader = new RBufferReader(header_blob); + // Read the envelope metadata + this._readEnvelopeMetadata(reader); + + // TODO: Validate the envelope checksum at the end of deserialization + // const payloadStart = reader.offset; + + // Read feature flags list (may span multiple 64-bit words) + this._readFeatureFlags(reader); + + // Read metadata strings + this.name = reader.readString(); + this.description = reader.readString(); + this.writer = reader.readString(); + // TODO: Remove debug logs before finalizing + console.log('Name:', this.name); + console.log('Description:', this.description); + console.log('Writer:', this.writer); + + // List frame: list of field record frames + this._readFieldDescriptors(reader); + // List frame: list of column record frames + this._readColumnDescriptors(reader); + // Read alias column descriptors + this._readAliasColumn(reader); + // Read Extra Type Information + this._readExtraTypeInformation(reader); +} - // 1. Read header version (4 bytes) - this.version = reader.readU32(); - // 2. Read feature flags (4 bytes) - this.headerFeatureFlags = reader.readU32(); +deserializeFooter(footer_blob) { + if (!footer_blob) return; - // 3. Read xxhash3 (64-bit, 8 bytes) - this.xxhash3 = reader.readU64(); + const reader = new RBufferReader(footer_blob); + // Read the envelope metadata + this._readEnvelopeMetadata(reader); - // 4. Read name (length-prefixed string) - this.name = reader.readString(); + // Feature flag(32 bits) + this.footerFeatureFlags = reader.readU32(); + // Header checksum (64-bit xxhash3) + this.headerChecksum = reader.readU64(); - // 5. Read description (length-prefixed string) - this.description = reader.readString(); - + // Schema extension record frame (4 list frames inside) + this._readFieldDescriptors(reader); + this._readColumnDescriptors(reader); + this._readAliasColumnDescriptors(reader); + this._readExtraTypeInfos(reader); - // Console output to verify deserialization results - console.log('Version:', this.version); - console.log('Header Feature Flags:', this.headerFeatureFlags); - console.log('xxhash3:', '0x' + this.xxhash3.toString(16).padStart(16, '0')); - console.log('Name:', this.name); - console.log('Description:', this.description); + // Cluster Group record frame + this._readClusterGroups(reader); } -deserializeFooter(footer_blob) { - if (!footer_blob) return; +_readEnvelopeMetadata(reader) { + const typeAndLength = reader.readU64(); - const reader = new RBufferReader(footer_blob); + // Envelope metadata + // The 16 bits are the envelope type ID, and the 48 bits are the envelope length + this.envelopeType = Number(typeAndLength & 0xFFFFn); + this.envelopeLength = Number((typeAndLength >> 16n) & 0xFFFFFFFFFFFFn); - this.footerFeatureFlags = reader.readU32(); - this.headerChecksum = reader.readU32(); + console.log('Envelope Type ID:', this.envelopeType); + console.log('Envelope Length:', this.envelopeLength); +} +_readFeatureFlags(reader) { + this.featureFlags = []; + while (true) { + const val = reader.readU64(); + this.featureFlags.push(val); + if ((val & 0x8000000000000000n) === 0n) break; // MSB not set: end of list + } + + // verify all feature flags are zero + if (this.featureFlags.some(v => v !== 0n)) + throw new Error('Unexpected non-zero feature flags: ' + this.featureFlags); +} - console.log('Footer Feature Flags:', this.footerFeatureFlags); - console.log('Header Checksum:', this.headerChecksum); +_readFieldDescriptors(reader) { +const fieldListSize = reader.readS64(), // signed 64-bit +fieldListIsList = fieldListSize < 0; + + + if (!fieldListIsList) + throw new Error('Field list frame is not a list frame, which is required.'); + + const fieldListCount = reader.readU32(); // number of field entries + console.log('Field List Count:', fieldListCount); + + // List frame: list of field record frames + + const fieldDescriptors = []; + for (let i = 0; i < fieldListCount; ++i) { + const fieldVersion = reader.readU32(), + typeVersion = reader.readU32(), + parentFieldId = reader.readU32(), + structRole = reader.readU16(), + flags = reader.readU16(), + + fieldName = reader.readString(), + typeName = reader.readString(), + typeAlias = reader.readString(), + description = reader.readString(); + let arraySize = null, sourceFieldId = null, checksum = null; + + if (flags & 0x1) arraySize = reader.readU32(); + if (flags & 0x2) sourceFieldId = reader.readU32(); + if (flags & 0x4) checksum = reader.readU32(); + + fieldDescriptors.push({ + fieldVersion, + typeVersion, + parentFieldId, + structRole, + flags, + fieldName, + typeName, + typeAlias, + description, + arraySize, + sourceFieldId, + checksum + }); + console.log(`Field ${i + 1}:`, fieldName, '&&', typeName); +} + this.fieldDescriptors = fieldDescriptors; +} +_readColumnDescriptors(reader) { + this.columnListSize = reader.readS64(); // signed 64-bit + const columnListIsList = this.columnListSize < 0; + if (!columnListIsList) + throw new Error('Column list frame is not a list frame, which is required.'); + const columnListCount = reader.readU32(); // number of column entries + console.log('Column List Count:', columnListCount); + const columnDescriptors = []; + for (let i = 0; i < columnListCount; ++i) { + const coltype = reader.readU16(), + bitsOnStrorage = reader.readU16(), + fieldId = reader.readU32(), + flags = reader.readU16(), + representationIndex = reader.readU16(); + + let firstElementIndex = null, minValue = null, maxValue = null; + if (flags & 0x1) firstElementIndex = reader.readU64(); + if (flags & 0x2){ + minValue = reader.readF64(); + maxValue = reader.readF64(); } + const column = { + coltype, + bitsOnStrorage, + fieldId, + flags, + representationIndex, + firstElementIndex, + minValue, + maxValue + }; + + column.isDeferred = function () { + return (this.flags & 0x01) !== 0; + }; + + column.isSuppressed = function () { + return this.firstElementIndex !== null && this.firstElementIndex < 0; + }; + + columnDescriptors.push(column); + } + this.columnDescriptors = columnDescriptors; +} +_readAliasColumn(reader){ + this.aliasColumnListSize = reader.readS64(); // signed 64-bit + const aliasListisList = this.aliasColumnListSize < 0; + if (!aliasListisList) + throw new Error('Alias column list frame is not a list frame, which is required.'); + const aliasColumnCount = reader.readU32(); // number of alias column entries + console.log('Alias Column List Count:', aliasColumnCount); + const aliasColumns = []; + for (let i = 0; i < aliasColumnCount; ++i){ + const physicalColumnId = reader.readU32(), + fieldId = reader.readU32(); + aliasColumns.push({ + physicalColumnId, + fieldId + }); + } + this.aliasColumns = aliasColumns; +} +_readExtraTypeInformation(reader) { + this.extraTypeInfoListSize = reader.readS64(); // signed 64-bit + const isList = this.extraTypeInfoListSize < 0; + + if (!isList) + throw new Error('Extra type info frame is not a list frame, which is required.'); + + const entryCount = reader.readU32(); // number of extra type info entries + console.log('Extra Type Info Count:', entryCount); + + const extraTypeInfo = []; + for (let i = 0; i < entryCount; ++i) { + const contentId = reader.readU32(), + typeVersion = reader.readU32(); + extraTypeInfo.push({ + contentId, + typeVersion + }); + } + this.extraTypeInfo = extraTypeInfo; +} +_readClusterGroups(reader){ + const clusterGroupListSize = reader.readS64(), + isList = clusterGroupListSize < 0; + if (!isList) + throw new Error('Cluster group frame is not a list frame.'); + + const clusterGroupCount = reader.readU32(); + console.log('Cluster Group Count:', clusterGroupCount); + + const clusterGroups = []; + for (let i = 0; i < clusterGroupCount; ++i) { + const minEntry = reader.readS64(), + entrySpan = reader.readS64(), + numClusters = reader.readU32(); + clusterGroups.push({ + minEntry, + entrySpan, + numClusters + }); + } + this.clusterGroups = clusterGroups; +} }