From d536442f66b15e70b9a629914fd2904db9a91d5d Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 14:20:11 +0100 Subject: [PATCH 01/20] add an iceberg table loader --- .../api/deltasharing/SampleTables.java | 39 +++++- server/core/build.gradle.kts | 6 + .../io/whitefox/core/MetastoreProperties.java | 37 ++++++ .../java/io/whitefox/core/MetastoreType.java | 3 +- .../core/services/AbstractSharedTable.java | 15 +++ .../core/services/DeltaShareTableLoader.java | 2 +- .../core/services/DeltaSharedTable.java | 2 +- .../core/services/DeltaSharesServiceImpl.java | 24 ++-- .../core/services/IcebergSharedTable.java | 121 ++++++++++++++++++ .../core/services/IcebergTableLoader.java | 13 ++ .../whitefox/core/services/TableLoader.java | 4 +- .../core/services/TableLoaderFactory.java | 8 ++ .../core/services/TableLoaderFactoryImpl.java | 17 +++ .../core/services/TableSchemaConverter.java | 60 +++++++++ .../core/services/DeltaShareServiceTest.java | 54 ++++---- .../services/IcebergCatalogServiceTest.java | 69 ++++++++++ .../core/services/IcebergTableLoaderTest.java | 39 ++++++ .../core/services/ShareServiceTest.java | 2 +- .../java/io/whitefox/DeltaTestUtils.java | 58 +-------- .../java/io/whitefox/IcebergTestUtils.java | 50 ++++++++ .../java/io/whitefox/TestUtils.java | 51 ++++++++ 21 files changed, 576 insertions(+), 98 deletions(-) create mode 100644 server/core/src/main/java/io/whitefox/core/services/AbstractSharedTable.java create mode 100644 server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java create mode 100644 server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java create mode 100644 server/core/src/main/java/io/whitefox/core/services/TableLoaderFactory.java create mode 100644 server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java create mode 100644 server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java create mode 100644 server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java create mode 100644 server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java create mode 100644 server/core/src/testFixtures/java/io/whitefox/TestUtils.java diff --git a/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java b/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java index 777cc5523..4a12bc5ed 100644 --- a/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java +++ b/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java @@ -6,13 +6,12 @@ import io.whitefox.api.deltasharing.model.FileObjectFileWithoutPresignedUrl; import io.whitefox.api.deltasharing.model.FileObjectWithoutPresignedUrl; import io.whitefox.api.deltasharing.model.v1.generated.*; -import io.whitefox.core.InternalTable; -import io.whitefox.core.Principal; -import io.whitefox.core.SharedTable; +import io.whitefox.core.*; import io.whitefox.persistence.StorageManager; import io.whitefox.persistence.memory.InMemoryStorageManager; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; public class SampleTables { @@ -35,6 +34,40 @@ public static InternalTable s3DeltaTableWithHistory1(S3TestConfig s3TestConfig) public static final InternalTable deltaTableWithHistory1 = deltaTable("delta-table-with-history"); + public static InternalTable s3DeltaTable(String s3TableName, S3TestConfig s3TestConfig) { + var mrFoxPrincipal = new Principal("Mr. Fox"); + return new InternalTable( + s3TableName, + Optional.empty(), + new InternalTable.DeltaTableProperties(s3DeltaTableUri(s3TableName)), + Optional.of(0L), + 0L, + mrFoxPrincipal, + 0L, + mrFoxPrincipal, + getProvider(getS3Storage(mrFoxPrincipal, s3TestConfig), mrFoxPrincipal, Optional.empty())); + } + + public static Storage getS3Storage(Principal principal, S3TestConfig s3TestConfig) { + return new Storage( + "storage", + Optional.empty(), + principal, + StorageType.S3, + Optional.empty(), + "uri", + 0L, + principal, + 0L, + principal, + new StorageProperties.S3Properties(new AwsCredentials.SimpleAwsCredentials( + s3TestConfig.accessKey(), s3TestConfig.secretKey(), s3TestConfig.region()))); + } + + public static String s3DeltaTableUri(String s3TableName) { + return String.format("s3a://whitefox-s3-test-bucket/delta/samples/%s", s3TableName); + } + public static StorageManager createStorageManager() { return new InMemoryStorageManager(List.of(new io.whitefox.core.Share( "name", diff --git a/server/core/build.gradle.kts b/server/core/build.gradle.kts index e735e22cd..d1a194bc1 100644 --- a/server/core/build.gradle.kts +++ b/server/core/build.gradle.kts @@ -30,6 +30,12 @@ dependencies { implementation("io.delta:delta-standalone_2.13:3.0.0") implementation(String.format("org.apache.hadoop:hadoop-common:%s", hadoopVersion)) + //ICEBERG + implementation("org.apache.iceberg:iceberg-api:1.4.3") + implementation("org.apache.iceberg:iceberg-core:1.4.3") + implementation("org.apache.iceberg:iceberg-aws:1.4.3") + compileOnly("software.amazon.awssdk:glue:2.22.10") + //AWS compileOnly(String.format("com.amazonaws:aws-java-sdk-bom:%s", awsSdkVersion)) compileOnly(String.format("com.amazonaws:aws-java-sdk-s3:%s", awsSdkVersion)) diff --git a/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java b/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java index 7536260ca..425befaf5 100644 --- a/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java +++ b/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java @@ -51,4 +51,41 @@ public String toString() { + credentials + ']'; } } + + final class HadoopMetastoreProperties implements MetastoreProperties { + private final String location; + + public HadoopMetastoreProperties(String location, MetastoreType type) { + if (type != MetastoreType.HADOOP) { + throw new IllegalArgumentException(String.format( + "GlueMetastore properties are not compatible with metastore of type %o", type)); + } + this.location = location; + } + + public String location() { + return location; + } + + @Override + @SkipCoverageGenerated + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (HadoopMetastoreProperties) obj; + return Objects.equals(this.location, that.location); + } + + @Override + @SkipCoverageGenerated + public int hashCode() { + return Objects.hash(location); + } + + @Override + @SkipCoverageGenerated + public String toString() { + return "HadoopMetastoreProperties[" + "location=" + location + ']'; + } + } } diff --git a/server/core/src/main/java/io/whitefox/core/MetastoreType.java b/server/core/src/main/java/io/whitefox/core/MetastoreType.java index 5e89ff1ff..2bf9816d9 100644 --- a/server/core/src/main/java/io/whitefox/core/MetastoreType.java +++ b/server/core/src/main/java/io/whitefox/core/MetastoreType.java @@ -4,7 +4,8 @@ import java.util.Optional; public enum MetastoreType { - GLUE("glue"); + GLUE("glue"), + HADOOP("hadoop"); public final String value; diff --git a/server/core/src/main/java/io/whitefox/core/services/AbstractSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/AbstractSharedTable.java new file mode 100644 index 000000000..2c456576b --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/services/AbstractSharedTable.java @@ -0,0 +1,15 @@ +package io.whitefox.core.services; + +import io.whitefox.core.Metadata; +import io.whitefox.core.ReadTableRequest; +import io.whitefox.core.ReadTableResultToBeSigned; +import java.util.Optional; + +public interface AbstractSharedTable { + + Optional getMetadata(Optional startingTimestamp); + + Optional getTableVersion(Optional startingTimestamp); + + ReadTableResultToBeSigned queryTable(ReadTableRequest readTableRequest); +} diff --git a/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java b/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java index 0e8a90c1d..baccc1a26 100644 --- a/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java +++ b/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java @@ -4,7 +4,7 @@ import jakarta.enterprise.context.ApplicationScoped; @ApplicationScoped -public class DeltaShareTableLoader implements TableLoader { +public class DeltaShareTableLoader implements TableLoader { @Override public DeltaSharedTable loadTable(SharedTable sharedTable) { diff --git a/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java index 69fff6530..107eb9e92 100644 --- a/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java +++ b/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java @@ -9,7 +9,7 @@ import java.util.Optional; import java.util.stream.Collectors; -public class DeltaSharedTable { +public class DeltaSharedTable implements AbstractSharedTable { private final DeltaLog deltaLog; private final TableSchemaConverter tableSchemaConverter; diff --git a/server/core/src/main/java/io/whitefox/core/services/DeltaSharesServiceImpl.java b/server/core/src/main/java/io/whitefox/core/services/DeltaSharesServiceImpl.java index 23bde9a77..a2a64338a 100644 --- a/server/core/src/main/java/io/whitefox/core/services/DeltaSharesServiceImpl.java +++ b/server/core/src/main/java/io/whitefox/core/services/DeltaSharesServiceImpl.java @@ -15,8 +15,7 @@ public class DeltaSharesServiceImpl implements DeltaSharesService { private final StorageManager storageManager; private final Integer defaultMaxResults; - private final DeltaShareTableLoader tableLoader; - + private final TableLoaderFactory tableLoaderFactory; private final FileSignerFactory fileSignerFactory; @Inject @@ -24,11 +23,11 @@ public DeltaSharesServiceImpl( StorageManager storageManager, @ConfigProperty(name = "io.delta.sharing.api.server.defaultMaxResults") Integer defaultMaxResults, - DeltaShareTableLoader tableLoader, + TableLoaderFactory tableLoaderFactory, FileSignerFactory signerFactory) { this.storageManager = storageManager; this.defaultMaxResults = defaultMaxResults; - this.tableLoader = tableLoader; + this.tableLoaderFactory = tableLoaderFactory; this.fileSignerFactory = signerFactory; } @@ -37,7 +36,10 @@ public Optional getTableVersion( String share, String schema, String table, String startingTimestamp) { return storageManager .getSharedTable(share, schema, table) - .map(t -> tableLoader.loadTable(t).getTableVersion(Optional.ofNullable(startingTimestamp))) + .map(t -> tableLoaderFactory + .newTableLoader(t.internalTable()) + .loadTable(t) + .getTableVersion(Optional.ofNullable(startingTimestamp))) .orElse(Optional.empty()); } @@ -59,9 +61,10 @@ public ContentAndToken> listShares( @Override public Optional getTableMetadata( String share, String schema, String table, String startingTimestamp) { - return storageManager - .getSharedTable(share, schema, table) - .flatMap(t -> tableLoader.loadTable(t).getMetadata(Optional.ofNullable(startingTimestamp))); + return storageManager.getSharedTable(share, schema, table).flatMap(t -> tableLoaderFactory + .newTableLoader(t.internalTable()) + .loadTable(t) + .getMetadata(Optional.ofNullable(startingTimestamp))); } @Override @@ -128,7 +131,10 @@ public ReadTableResult queryTable( var fileSigner = fileSignerFactory.newFileSigner(sharedTable.internalTable().provider().storage()); - var readTableResultToBeSigned = tableLoader.loadTable(sharedTable).queryTable(queryRequest); + var readTableResultToBeSigned = tableLoaderFactory + .newTableLoader(sharedTable.internalTable()) + .loadTable(sharedTable) + .queryTable(queryRequest); return new ReadTableResult( readTableResultToBeSigned.protocol(), readTableResultToBeSigned.metadata(), diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java new file mode 100644 index 000000000..3e28d6c6f --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java @@ -0,0 +1,121 @@ +package io.whitefox.core.services; + +import io.whitefox.core.*; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.Table; +import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; + +public class IcebergSharedTable implements AbstractSharedTable { + + private final Table icebergTable; + private final TableSchemaConverter tableSchemaConverter; + + private IcebergSharedTable(Table icebergTable, TableSchemaConverter tableSchemaConverter) { + this.icebergTable = icebergTable; + this.tableSchemaConverter = tableSchemaConverter; + } + + public static IcebergSharedTable of( + SharedTable tableDetails, + TableSchemaConverter tableSchemaConverter, + HadoopConfigBuilder hadoopConfigBuilder) { + + if (tableDetails.internalTable().properties() instanceof InternalTable.IcebergTableProperties) { + var metastore = getMetastore(tableDetails.internalTable()); + var catalog = newCatalog( + metastore, + hadoopConfigBuilder, + tableDetails.internalTable().provider().storage()); + var tableId = getTableIdentifier(tableDetails.internalTable()); + try { + return new IcebergSharedTable(catalog.loadTable(tableId), tableSchemaConverter); + } catch (Exception e) { + throw new IllegalArgumentException(String.format( + "Cannot found iceberg table [%s] under namespace [%s]", + tableId.name(), tableId.namespace())); + } + } else { + throw new IllegalArgumentException( + String.format("%s is not an iceberg table", tableDetails.name())); + } + } + + private static TableIdentifier getTableIdentifier(InternalTable internalTable) { + var icebergTableProperties = + ((InternalTable.IcebergTableProperties) internalTable.properties()); + return TableIdentifier.of( + icebergTableProperties.databaseName(), icebergTableProperties.tableName()); + } + + private static Metastore getMetastore(InternalTable internalTable) { + return internalTable + .provider() + .metastore() + .orElseThrow(() -> new RuntimeException( + String.format("missing metastore for the iceberg table: [%s]", internalTable.name()))); + } + + private static BaseMetastoreCatalog newCatalog( + Metastore metastore, HadoopConfigBuilder hadoopConfigBuilder, Storage storage) { + if (metastore.type() == MetastoreType.GLUE) { + try (var catalog = new GlueCatalog()) { + Configuration conf = hadoopConfigBuilder.buildConfig(storage); + catalog.setConf(conf); + catalog.initialize(metastore.name(), setGlueProperties()); + return catalog; + } catch (IOException e) { + throw new RuntimeException("Unexpected exception when initializing the glue catalog", e); + } + } else if (metastore.type() == MetastoreType.HADOOP) { + try (var catalog = new HadoopCatalog()) { + Configuration conf = hadoopConfigBuilder.buildConfig(storage); + catalog.setConf(conf); + catalog.initialize( + metastore.name(), + Map.of( + "warehouse", + ((MetastoreProperties.HadoopMetastoreProperties) metastore.properties()) + .location())); + return catalog; + } catch (IOException e) { + throw new RuntimeException("Unexpected exception when initializing the hadoop catalog", e); + } + } else { + throw new RuntimeException(String.format("Unknown metastore type: [%s]", metastore.type())); + } + } + + public static IcebergSharedTable of(SharedTable sharedTable) { + return of(sharedTable, new TableSchemaConverter(), new HadoopConfigBuilder()); + } + + public Optional getMetadata(Optional startingTimestamp) { + throw new NotImplementedException(); + } + + @Override + public Optional getTableVersion(Optional startingTimestamp) { + throw new NotImplementedException(); + } + + @Override + public ReadTableResultToBeSigned queryTable(ReadTableRequest readTableRequest) { + throw new NotImplementedException(); + } + + private static Map setGlueProperties() { + Map properties = new HashMap<>(); + properties.put(CatalogProperties.CATALOG_IMPL, "org.apache.iceberg.aws.glue.GlueCatalog"); + properties.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.aws.s3.S3FileIO"); + return properties; + } +} diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java new file mode 100644 index 000000000..535fcfdcf --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java @@ -0,0 +1,13 @@ +package io.whitefox.core.services; + +import io.whitefox.core.SharedTable; +import jakarta.enterprise.context.ApplicationScoped; + +@ApplicationScoped +public class IcebergTableLoader implements TableLoader { + + @Override + public IcebergSharedTable loadTable(SharedTable sharedTable) { + return IcebergSharedTable.of(sharedTable); + } +} diff --git a/server/core/src/main/java/io/whitefox/core/services/TableLoader.java b/server/core/src/main/java/io/whitefox/core/services/TableLoader.java index 071e19a58..b7efad80d 100644 --- a/server/core/src/main/java/io/whitefox/core/services/TableLoader.java +++ b/server/core/src/main/java/io/whitefox/core/services/TableLoader.java @@ -2,7 +2,7 @@ import io.whitefox.core.SharedTable; -public interface TableLoader { +public interface TableLoader { - DeltaSharedTable loadTable(SharedTable sharedTable); + AbstractSharedTable loadTable(SharedTable sharedTable); } diff --git a/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactory.java b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactory.java new file mode 100644 index 000000000..3eaba9d25 --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactory.java @@ -0,0 +1,8 @@ +package io.whitefox.core.services; + +import io.whitefox.core.InternalTable; + +public interface TableLoaderFactory { + + TableLoader newTableLoader(InternalTable internalTable); +} diff --git a/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java new file mode 100644 index 000000000..92daef652 --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java @@ -0,0 +1,17 @@ +package io.whitefox.core.services; + +import io.whitefox.core.InternalTable; +import jakarta.enterprise.context.ApplicationScoped; + +@ApplicationScoped +public class TableLoaderFactoryImpl implements TableLoaderFactory { + + @Override + public TableLoader newTableLoader(InternalTable internalTable) { + if (internalTable.properties() instanceof InternalTable.DeltaTableProperties) { + return new DeltaShareTableLoader(); + } else if (internalTable.properties() instanceof InternalTable.IcebergTableProperties) { + return new IcebergTableLoader(); + } else throw new RuntimeException(String.format("unknown table [%s]", internalTable.name())); + } +} diff --git a/server/core/src/main/java/io/whitefox/core/services/TableSchemaConverter.java b/server/core/src/main/java/io/whitefox/core/services/TableSchemaConverter.java index b990ac017..5d4adde6a 100644 --- a/server/core/src/main/java/io/whitefox/core/services/TableSchemaConverter.java +++ b/server/core/src/main/java/io/whitefox/core/services/TableSchemaConverter.java @@ -4,6 +4,8 @@ import java.util.Map; import java.util.Objects; import java.util.stream.Collectors; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; public class TableSchemaConverter { @@ -27,6 +29,19 @@ public StructType convertDeltaSchemaToWhitefox(io.delta.standalone.types.StructT return structType; } + public StructType convertIcebergSchemaToWhitefox(Types.StructType st) { + var fields = st.fields(); + var structType = new StructType(); + for (Types.NestedField field : fields) { + var name = field.name(); + var dataType = field.type(); + var nullable = field.isOptional(); + structType = structType.add( + new StructField(name, convertIcebergDataTypeToWhitefox(dataType), nullable, Map.of())); + } + return structType; + } + public DataType convertDeltaDataTypeToWhitefox(io.delta.standalone.types.DataType st) { if (st instanceof io.delta.standalone.types.BooleanType) { return BooleanType.BOOLEAN; @@ -70,4 +85,49 @@ public DataType convertDeltaDataTypeToWhitefox(io.delta.standalone.types.DataTyp throw new IllegalArgumentException("Unknown type: " + st); } } + + public DataType convertIcebergDataTypeToWhitefox(org.apache.iceberg.types.Type icebergType) { + if (icebergType.isPrimitiveType()) { + return convertIcebergPrimitiveTypeToWhitefox(icebergType.asPrimitiveType()); + } else if (icebergType.isListType()) { + return new ArrayType( + convertIcebergDataTypeToWhitefox(icebergType.asListType().elementType()), + icebergType.asListType().isElementOptional()); + } else if (icebergType.isMapType()) { + return new io.whitefox.core.types.MapType( + convertIcebergDataTypeToWhitefox(icebergType.asMapType().keyType()), + convertIcebergDataTypeToWhitefox(icebergType.asMapType().valueType()), + icebergType.asMapType().isValueOptional()); + } else if (icebergType.isStructType()) { + return convertIcebergSchemaToWhitefox(icebergType.asStructType()); + } else { + throw new IllegalArgumentException("Unknown type: " + icebergType); + } + } + + private DataType convertIcebergPrimitiveTypeToWhitefox(Type.PrimitiveType primitiveType) { + if (primitiveType instanceof Types.BooleanType) { + return BooleanType.BOOLEAN; + } else if (primitiveType instanceof Types.IntegerType) { + return IntegerType.INTEGER; + } else if (primitiveType instanceof Types.LongType) { + return LongType.LONG; + } else if (primitiveType instanceof Types.FloatType) { + return FloatType.FLOAT; + } else if (primitiveType instanceof Types.DoubleType) { + return DoubleType.DOUBLE; + } else if (primitiveType instanceof Types.StringType) { + return StringType.STRING; + } else if (primitiveType instanceof Types.BinaryType) { + return BinaryType.BINARY; + } else if (primitiveType instanceof Types.DateType) { + return DateType.DATE; + } else if (primitiveType instanceof Types.TimestampType) { + return TimestampType.TIMESTAMP; + } else if (primitiveType instanceof Types.DecimalType) { + return new io.whitefox.core.types.DecimalType( + ((Types.DecimalType) primitiveType).precision(), + ((Types.DecimalType) primitiveType).scale()); + } else throw new RuntimeException(String.format("unknown primitive type: [%s]", primitiveType)); + } } diff --git a/server/core/src/test/java/io/whitefox/core/services/DeltaShareServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/DeltaShareServiceTest.java index af39f1a9b..bf9778798 100644 --- a/server/core/src/test/java/io/whitefox/core/services/DeltaShareServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/DeltaShareServiceTest.java @@ -1,14 +1,16 @@ package io.whitefox.core.services; import io.whitefox.DeltaTestUtils; -import io.whitefox.core.*; +import io.whitefox.core.Principal; +import io.whitefox.core.Schema; +import io.whitefox.core.Share; +import io.whitefox.core.SharedTable; import io.whitefox.persistence.StorageManager; import io.whitefox.persistence.memory.InMemoryStorageManager; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.concurrent.ExecutionException; import org.hamcrest.Matchers; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -16,7 +18,7 @@ import org.junit.jupiter.api.condition.OS; public class DeltaShareServiceTest { - DeltaShareTableLoader loader = new DeltaShareTableLoader(); + TableLoaderFactory tableLoaderFactory = new TableLoaderFactoryImpl(); Integer defaultMaxResults = 10; FileSignerFactory fileSignerFactory = new FileSignerFactoryImpl(new S3ClientFactoryImpl()); @@ -27,33 +29,33 @@ private static Share createShare(String name, String key, Map sc } @Test - public void listShares() throws ExecutionException, InterruptedException { + public void listShares() { var shares = List.of(createShare("name", "key", Collections.emptyMap())); StorageManager storageManager = new InMemoryStorageManager(shares); - DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, defaultMaxResults, loader, fileSignerFactory); + DeltaSharesService deltaSharesService = new DeltaSharesServiceImpl( + storageManager, defaultMaxResults, tableLoaderFactory, fileSignerFactory); var sharesWithNextToken = deltaSharesService.listShares(Optional.empty(), Optional.of(30)); Assertions.assertEquals(1, sharesWithNextToken.getContent().size()); Assertions.assertTrue(sharesWithNextToken.getToken().isEmpty()); } @Test - public void listSharesWithToken() throws ExecutionException, InterruptedException { + public void listSharesWithToken() { var shares = List.of(createShare("name", "key", Collections.emptyMap())); StorageManager storageManager = new InMemoryStorageManager(shares); - DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, defaultMaxResults, loader, fileSignerFactory); + DeltaSharesService deltaSharesService = new DeltaSharesServiceImpl( + storageManager, defaultMaxResults, tableLoaderFactory, fileSignerFactory); var sharesWithNextToken = deltaSharesService.listShares(Optional.empty(), Optional.of(30)); Assertions.assertEquals(1, sharesWithNextToken.getContent().size()); Assertions.assertTrue(sharesWithNextToken.getToken().isEmpty()); } @Test - public void listSchemasOfEmptyShare() throws ExecutionException, InterruptedException { + public void listSchemasOfEmptyShare() { var shares = List.of(createShare("name", "key", Collections.emptyMap())); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultSchemas = deltaSharesService.listSchemas("name", Optional.empty(), Optional.empty()); Assertions.assertTrue(resultSchemas.isPresent()); Assertions.assertTrue(resultSchemas.get().getContent().isEmpty()); @@ -61,12 +63,12 @@ public void listSchemasOfEmptyShare() throws ExecutionException, InterruptedExce } @Test - public void listSchemas() throws ExecutionException, InterruptedException { + public void listSchemas() { var shares = List.of(createShare( "name", "key", Map.of("default", new Schema("default", Collections.emptyList(), "name")))); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultSchemas = deltaSharesService.listSchemas("name", Optional.empty(), Optional.empty()); Assertions.assertTrue(resultSchemas.isPresent()); Assertions.assertEquals(1, resultSchemas.get().getContent().size()); @@ -77,19 +79,19 @@ public void listSchemas() throws ExecutionException, InterruptedException { } @Test - public void listSchemasOfUnknownShare() throws ExecutionException, InterruptedException { + public void listSchemasOfUnknownShare() { var shares = List.of(createShare( "name", "key", Map.of("default", new Schema("default", Collections.emptyList(), "name")))); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultSchemas = deltaSharesService.listSchemas("notKey", Optional.empty(), Optional.empty()); Assertions.assertTrue(resultSchemas.isEmpty()); } @Test - public void listTables() throws ExecutionException, InterruptedException { + public void listTables() { var shares = List.of(createShare( "name", "key", @@ -102,7 +104,7 @@ public void listTables() throws ExecutionException, InterruptedException { "name")))); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultSchemas = deltaSharesService.listTables("name", "default", Optional.empty(), Optional.empty()); Assertions.assertTrue(resultSchemas.isPresent()); @@ -114,7 +116,7 @@ public void listTables() throws ExecutionException, InterruptedException { } @Test - public void listAllTables() throws ExecutionException, InterruptedException { + public void listAllTables() { var shares = List.of(createShare( "name", "key", @@ -133,7 +135,7 @@ public void listAllTables() throws ExecutionException, InterruptedException { "name")))); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultSchemas = deltaSharesService.listTablesOfShare("name", Optional.empty(), Optional.empty()); Assertions.assertTrue(resultSchemas.isPresent()); @@ -145,7 +147,7 @@ public void listAllTables() throws ExecutionException, InterruptedException { } @Test - public void listAllTablesEmpty() throws ExecutionException, InterruptedException { + public void listAllTablesEmpty() { var shares = List.of( createShare( "name", @@ -166,7 +168,7 @@ public void listAllTablesEmpty() throws ExecutionException, InterruptedException createShare("name2", "key2", Map.of())); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultSchemas = deltaSharesService.listTablesOfShare("name2", Optional.empty(), Optional.empty()); Assertions.assertTrue(resultSchemas.isPresent()); @@ -175,10 +177,10 @@ public void listAllTablesEmpty() throws ExecutionException, InterruptedException } @Test - public void listAllTablesNoShare() throws ExecutionException, InterruptedException { + public void listAllTablesNoShare() { StorageManager storageManager = new InMemoryStorageManager(); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultSchemas = deltaSharesService.listTablesOfShare("name2", Optional.empty(), Optional.empty()); Assertions.assertTrue(resultSchemas.isEmpty()); @@ -186,7 +188,7 @@ public void listAllTablesNoShare() throws ExecutionException, InterruptedExcepti @Test @DisabledOnOs(OS.WINDOWS) - public void getTableMetadata() { + public void getDeltaTableMetadata() { var shares = List.of(createShare( "name", "key", @@ -199,7 +201,7 @@ public void getTableMetadata() { "name")))); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var tableMetadata = deltaSharesService.getTableMetadata("name", "default", "table1", null); Assertions.assertTrue(tableMetadata.isPresent()); Assertions.assertEquals( @@ -220,7 +222,7 @@ public void tableMetadataNotFound() { "name")))); StorageManager storageManager = new InMemoryStorageManager(shares); DeltaSharesService deltaSharesService = - new DeltaSharesServiceImpl(storageManager, 100, loader, fileSignerFactory); + new DeltaSharesServiceImpl(storageManager, 100, tableLoaderFactory, fileSignerFactory); var resultTable = deltaSharesService.getTableMetadata("name", "default", "tableNotFound", null); Assertions.assertTrue(resultTable.isEmpty()); } diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java new file mode 100644 index 000000000..3f8d596a5 --- /dev/null +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -0,0 +1,69 @@ +package io.whitefox.core.services; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.junit.jupiter.api.Test; + +public class IcebergCatalogServiceTest { + + /** + * This is some sample code that you need to run in your spark shell to generate new iceberg tables for new test cases: + * To run the spark-shell with delta support execute: + * {{{ + * spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 \ + * --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \ + * --conf spark.sql.catalog.spark_catalog.type=hadoop \ + * --conf spark.sql.catalog.spark_catalog.warehouse=/Users/marco/agilelab_wa/lake-sharing/server/core/src/testFixtures/resources/iceberg/samples/ \ + * --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions + * Take care that the version of iceberg must be compatible with the version of spark and scala you are using + * (i.e. I'm using iceberg 3.5 on scala 2.12 because my local spark-shell is version 3.5.0 using scala 2.12 + * + * First, uou need to create an iceberg table on your local hadoop catalog + * {{{ + * import org.apache.iceberg.catalog.Namespace; + * import org.apache.iceberg.Schema; + * import org.apache.iceberg.catalog.TableIdentifier; + * import org.apache.iceberg.hadoop.HadoopCatalog; + * import java.util.Map; + * import org.apache.hadoop.conf.Configuration; + * + * val catalogProps = Map.of( + * "warehouse", "/Users/marco/agilelab_wa/lake-sharing/server/core/src/testFixtures/resources/iceberg/samples/", + * "io.manifest.cache-enabled", "true"); + * val catalog = new HadoopCatalog(); + * catalog.setConf(new Configuration()); + * catalog.initialize("hadoop", catalogProps); + * catalog.createNamespace(Namespace.of("test_db")); + * val schema = new Schema(org.apache.iceberg.types.Types.NestedField.required(1, "id", org.apache.iceberg.types.Types.LongType.get())) + * catalog.createTable(TableIdentifier.of("test_db", "icebergtable1"), schema) + * }}} + * Then, you can append data on your iceberg table + * {{{ + * val data = spark.range(0, 5) + * data.writeTo("test_db.icebergtable1").append() + * }}} + */ + @Test + void simpleIcebergTest() throws IOException { + Map catalogProps = Map.of( + "warehouse", + "/Users/marco/agilelab_wa/lake-sharing/server/core/src/testFixtures/resources/iceberg/samples/", + "io.manifest.cache-enabled", "true"); + try (HadoopCatalog hadoopCatalog = new HadoopCatalog()) { + // Initialize your catalog + hadoopCatalog.setConf(new Configuration()); + hadoopCatalog.initialize("hadoop", catalogProps); + TableIdentifier tableIdentifier = TableIdentifier.of("test_db", "icebergtable1"); + + // Load the Iceberg table + Table table = hadoopCatalog.loadTable(tableIdentifier); + assertEquals("hadoop.test_db.icebergtable1", table.name()); + } + } +} diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java new file mode 100644 index 000000000..3b281b708 --- /dev/null +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java @@ -0,0 +1,39 @@ +package io.whitefox.core.services; + +import static io.whitefox.IcebergTestUtils.icebergTableWithHadoopCatalog; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.whitefox.core.SharedTable; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; + +public class IcebergTableLoaderTest { + + private final IcebergTableLoader icebergTableLoader = new IcebergTableLoader(); + + @Test + @DisabledOnOs(OS.WINDOWS) + public void loadTable() { + SharedTable sharedTable = new SharedTable( + "icebergtable1", + "schema", + "share", + icebergTableWithHadoopCatalog("test_db", "icebergtable1")); + assertDoesNotThrow(() -> icebergTableLoader.loadTable(sharedTable)); + // TODO: add asserts here when IcebergSharedTable.getTableVersion has been implemented + // assertTrue(icebergSharedTable.getTableVersion(Optional.empty()).isPresent()); + // assertEquals(0, icebergSharedTable.getTableVersion(Optional.empty()).get()); + } + + @Test + public void loadUnknownTable() { + SharedTable sharedTable = new SharedTable( + "not-found", + "schema", + "share", + icebergTableWithHadoopCatalog("not-found-db", "not-found-table")); + assertThrows(IllegalArgumentException.class, () -> icebergTableLoader.loadTable(sharedTable)); + } +} diff --git a/server/core/src/test/java/io/whitefox/core/services/ShareServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/ShareServiceTest.java index 1ebf4ec6a..ccbaadac1 100644 --- a/server/core/src/test/java/io/whitefox/core/services/ShareServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/ShareServiceTest.java @@ -228,7 +228,7 @@ public void addTableToSchema() { var tablesFromDeltaService = new DeltaSharesServiceImpl( storage, 100, - new DeltaShareTableLoader(), + new TableLoaderFactoryImpl(), new FileSignerFactoryImpl(new S3ClientFactoryImpl())) .listTablesOfShare("share1", Optional.empty(), Optional.empty()) .get() diff --git a/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java b/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java index cb60f3da7..5ba01dbf9 100644 --- a/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java +++ b/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java @@ -1,11 +1,12 @@ package io.whitefox; -import io.whitefox.core.*; +import io.whitefox.core.InternalTable; +import io.whitefox.core.Principal; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Optional; -public class DeltaTestUtils { +public class DeltaTestUtils extends TestUtils { private static final Path deltaTablesRoot = Paths.get(".") .toAbsolutePath() @@ -35,57 +36,6 @@ public static InternalTable deltaTable(String tableName) { mrFoxPrincipal, 0L, mrFoxPrincipal, - getProvider(getLocalStorage(mrFoxPrincipal), mrFoxPrincipal)); - } - - public static Provider getProvider(Storage storage, Principal principal) { - return new Provider( - "provider", storage, Optional.empty(), 0L, principal, 0L, principal, principal); - } - - public static Storage getStorage( - Principal principal, StorageType storageType, S3TestConfig s3TestConfig) { - return new Storage( - "storage", - Optional.empty(), - principal, - storageType, - Optional.empty(), - "uri", - 0L, - principal, - 0L, - principal, - new StorageProperties.S3Properties(new AwsCredentials.SimpleAwsCredentials( - s3TestConfig.accessKey(), s3TestConfig.secretKey(), s3TestConfig.region()))); - } - - public static Storage getLocalStorage(Principal principal) { - return getStorage( - principal, - StorageType.LOCAL, - new S3TestConfig("fakeRegion", "fakeAccessKey", "fakeSecretKey")); - } - - public static Storage getS3Storage(Principal principal, S3TestConfig s3TestConfig) { - return getStorage(principal, StorageType.S3, s3TestConfig); - } - - public static String s3DeltaTableUri(String s3TableName) { - return String.format("s3a://whitefox-s3-test-bucket/delta/samples/%s", s3TableName); - } - - public static InternalTable s3DeltaTable(String s3TableName, S3TestConfig s3TestConfig) { - var mrFoxPrincipal = new Principal("Mr. Fox"); - return new InternalTable( - s3TableName, - Optional.empty(), - new InternalTable.DeltaTableProperties(s3DeltaTableUri(s3TableName)), - Optional.of(0L), - 0L, - mrFoxPrincipal, - 0L, - mrFoxPrincipal, - getProvider(getS3Storage(mrFoxPrincipal, s3TestConfig), mrFoxPrincipal)); + getProvider(getLocalStorage(mrFoxPrincipal), mrFoxPrincipal, Optional.empty())); } } diff --git a/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java b/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java new file mode 100644 index 000000000..9ce6b2d22 --- /dev/null +++ b/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java @@ -0,0 +1,50 @@ +package io.whitefox; + +import io.whitefox.core.*; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Optional; + +public class IcebergTestUtils extends TestUtils { + + private static final Path icebergTablesRoot = Paths.get(".") + .toAbsolutePath() + .getParent() + .getParent() + .resolve("core") + .resolve("src/testFixtures/resources/iceberg/samples") + .toAbsolutePath(); + + public static String icebergTableUri(String tableName) { + return icebergTablesRoot + .resolve(tableName) + .toAbsolutePath() + .normalize() + .toUri() + .toString(); + } + + public static InternalTable icebergTableWithHadoopCatalog(String database, String tableName) { + var mrFoxPrincipal = new Principal("Mr. Fox"); + return new InternalTable( + tableName, + Optional.empty(), + new InternalTable.IcebergTableProperties(database, tableName), + Optional.of(0L), + 0L, + mrFoxPrincipal, + 0L, + mrFoxPrincipal, + getProvider( + getLocalStorage(mrFoxPrincipal), + mrFoxPrincipal, + Optional.of(getLocalHadoopMetastore(mrFoxPrincipal, icebergTablesRoot.toString())))); + } + + public static Metastore getLocalHadoopMetastore(Principal principal, String location) { + return getMetastore( + principal, + MetastoreType.HADOOP, + new MetastoreProperties.HadoopMetastoreProperties(location, MetastoreType.HADOOP)); + } +} diff --git a/server/core/src/testFixtures/java/io/whitefox/TestUtils.java b/server/core/src/testFixtures/java/io/whitefox/TestUtils.java new file mode 100644 index 000000000..7b10568c3 --- /dev/null +++ b/server/core/src/testFixtures/java/io/whitefox/TestUtils.java @@ -0,0 +1,51 @@ +package io.whitefox; + +import io.whitefox.core.*; +import java.util.Optional; + +public class TestUtils { + + public static Storage getStorage( + Principal principal, StorageType storageType, S3TestConfig s3TestConfig) { + return new Storage( + "storage", + Optional.empty(), + principal, + storageType, + Optional.empty(), + "uri", + 0L, + principal, + 0L, + principal, + new StorageProperties.S3Properties(new AwsCredentials.SimpleAwsCredentials( + s3TestConfig.accessKey(), s3TestConfig.secretKey(), s3TestConfig.region()))); + } + + public static Provider getProvider( + Storage storage, Principal principal, Optional metastore) { + return new Provider("provider", storage, metastore, 0L, principal, 0L, principal, principal); + } + + public static Storage getLocalStorage(Principal principal) { + return getStorage( + principal, + StorageType.LOCAL, + new S3TestConfig("fakeRegion", "fakeAccessKey", "fakeSecretKey")); + } + + public static Metastore getMetastore( + Principal principal, MetastoreType metastoreType, MetastoreProperties metastoreProperties) { + return new Metastore( + "metastore", + Optional.empty(), + principal, + metastoreType, + metastoreProperties, + Optional.empty(), + 0L, + principal, + 0L, + principal); + } +} From f122b0e3bfafa36d6d5f998f2ae8b5595ef75231 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 14:37:04 +0100 Subject: [PATCH 02/20] pushing a local iceberg table --- ...a-4145-aa64-38443b8051bc-00001.parquet.crc | Bin 0 -> 12 bytes ...a-4145-aa64-38443b8051bc-00001.parquet.crc | Bin 0 -> 12 bytes ...a-4145-aa64-38443b8051bc-00001.parquet.crc | Bin 0 -> 12 bytes ...a-4145-aa64-38443b8051bc-00001.parquet.crc | Bin 0 -> 12 bytes ...a-4145-aa64-38443b8051bc-00001.parquet.crc | Bin 0 -> 12 bytes ...-e31a-4145-aa64-38443b8051bc-00001.parquet | Bin 0 -> 418 bytes ...-e31a-4145-aa64-38443b8051bc-00001.parquet | Bin 0 -> 419 bytes ...-e31a-4145-aa64-38443b8051bc-00001.parquet | Bin 0 -> 419 bytes ...-e31a-4145-aa64-38443b8051bc-00001.parquet | Bin 0 -> 419 bytes ...-e31a-4145-aa64-38443b8051bc-00001.parquet | Bin 0 -> 419 bytes ...ac-b849-4013-9591-15dea11586ae-m0.avro.crc | Bin 0 -> 64 bytes ...ce3ac-b849-4013-9591-15dea11586ae.avro.crc | Bin 0 -> 44 bytes .../metadata/.v1.metadata.json.crc | Bin 0 -> 16 bytes .../metadata/.v2.metadata.json.crc | Bin 0 -> 28 bytes .../metadata/.version-hint.text.crc | Bin 0 -> 12 bytes ...4ce3ac-b849-4013-9591-15dea11586ae-m0.avro | Bin 0 -> 6710 bytes ...-be4ce3ac-b849-4013-9591-15dea11586ae.avro | Bin 0 -> 4310 bytes .../icebergtable1/metadata/v1.metadata.json | 39 ++++++++++ .../icebergtable1/metadata/v2.metadata.json | 70 ++++++++++++++++++ .../icebergtable1/metadata/version-hint.text | 1 + 20 files changed, 110 insertions(+) create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00001-1-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00003-3-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00004-4-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00006-6-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00007-7-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00001-1-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00003-3-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00004-4-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00006-6-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00007-7-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.be4ce3ac-b849-4013-9591-15dea11586ae-m0.avro.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.snap-3369848726892806393-1-be4ce3ac-b849-4013-9591-15dea11586ae.avro.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.v1.metadata.json.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.v2.metadata.json.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.version-hint.text.crc create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/be4ce3ac-b849-4013-9591-15dea11586ae-m0.avro create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/snap-3369848726892806393-1-be4ce3ac-b849-4013-9591-15dea11586ae.avro create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/v1.metadata.json create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/v2.metadata.json create mode 100644 server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/version-hint.text diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00001-1-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00001-1-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..dfa93a103e3b219919c3c52d8a509c5bafe0add7 GIT binary patch literal 12 TcmYc;N@ieSU}E^O^}H?s6-oo- literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00003-3-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00003-3-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..5ff8cf51bbdd82a831d6a5ee4c5f6851c609aa73 GIT binary patch literal 12 TcmYc;N@ieSU}EU#X5Rz=5&;7T literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00004-4-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00004-4-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..a426f32128a5bdc080db6f7c4bf5340f5175c23a GIT binary patch literal 12 TcmYc;N@ieSU}EUKCt(Bt60idB literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00006-6-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00006-6-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..ed7497a294453f6985b34f2cf69b4af2e20d9407 GIT binary patch literal 12 TcmYc;N@ieSU}DJ6$UX`H5)T7H literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00007-7-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/.00007-7-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..fe4c887ffc217f38c13167239e4732e992f1b87e GIT binary patch literal 12 TcmYc;N@ieSU}9MMeBTiO6$u1M literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00001-1-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00001-1-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ee12c57428c24787423bddf99b8cbb733711015a GIT binary patch literal 418 zcmYjO%Sr<=6uk`d!USe91X?Og>xAi>B6K6-q7*?9q~OdX*1+^N4?#-5z^y;w z2l!`h1v3;|@9I2~b1unbbSnsuQ^Lvn_ve#MkYSi$fcn?P4?K)uKkN)K%NV=)2MJ;n zfY}t2u>oMXdwrAzVXO!OQm%r9w7f$=5wn?H!!{|iDqvQ~NcZLVaI( zP)(Ux(-Z;Fj}ocSdvXaT1=TPeSBhP%6CJ5gC9`H4y{LI$O*cGyTPZ%^X_n+s#$CR1 z8d~y!=kjN*Vwv*6L+ZZdO6E&Q;c! zC(f%%QoUR_qAC2QaOz>MW9i7SC3?Q_`m!B%{g#T_eczM4t{2HbiH=Y$uP0>~oHN@t MuXt4gn3&)B1v%7FGXMYp literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00003-3-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00003-3-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4b8e5d511e4e0a2b02a5236247f0ef38642af863 GIT binary patch literal 419 zcmYLG%SyvQ6ulj)%tAz=Gh`r(FwjyVtr^nV2U6%p#8nW4f^?B&#u`XlJ4pm7-3tDI zf8sy*1AdO+gkm$RbI!f@oXeeHdM^l&Gs4OD$NSH*O^~6OV1V}5BBdAIEPhtY(3)>~{`R8ym}j{z z;+(sDU+E;0k3E+^B`TE}A3tt+sR);^!e!ZSn9H@=6p2=*V5XJ(@0G4s3%;B0z`A=0 z^`=mHXQ`c6rL$zcazt17UE#E&B1xqqqn;T0!W+qcH1K;W?vH#=4hLQ=LnW?+>Ul#c Rqws>+wz=YsxpZK@=MR=!QE~tP literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00004-4-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00004-4-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e5aa335f69b77c53d1f67c4f959dccbf380f2c65 GIT binary patch literal 419 zcmYLG%SyvQ6uliP%tAz=Gh`r(FwjyVtr^nV2U6%p#8nW4f^?B&#u`XplSGixg+Jk+ z_yhipOFuwxLa~|EIp^Mc&gD)peGml5IpO5z`~3^s1R076255g?{laVtfZ*`h1!5LH zvI-K!C;+o5CV>H9xO;b!hf%5s0#a{+g%bIQfFfoyyJb@9tO=NvGC8G)PH51=HJ!sV-QS@s*|a;>&SqLnF_Y32TVrR&v#@8^54?l+;{ z7Ao&7wezZUmaJEf=nB6poOV8(5#I;a8ZzyFH QUNYM@SG=tO1m=7G09Er*>;M1& literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00006-6-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00006-6-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4e5bb48c0d5c12695b5f2d327c40b43a4edb1147 GIT binary patch literal 419 zcmYLGO-sW-5S^|S_8>)}yJR7Uu+UN=ty$9AHb|it5l=x73erQ8U27oy%7-AO|3ds3 z9t3}zC&3NHW>4?EnR#!R^`{Sl068a|e7%4EG)#gt#RLO%zOQ~^HU&6=!($hS8TiO5 zND!j{%%qt38i3~Z-Dw_#i6RI{y#)q}Ihy;|`7d=JL`CP=r1 z%Dc&Vg2iDaO}wNQO$BxM*} PGSk#oysZKF`g{HW7kyF3 literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00007-7-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/data/00007-7-fd9c0ac2-e31a-4145-aa64-38443b8051bc-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f8b104fa2e3cc484425a57d8592d95966ab79c74 GIT binary patch literal 419 zcmYLG%SyvQ6uliP%tCxXXUISnVW6c#S~H}z52VnIh^rt71?eKmj5Uz9c9IBE3aZ{ef*r-1R076255g@{=#euP>20v=ZIPO zz$!=(qX5jNm;?rZ;qL8G9!9Am2uQsN7E0s;0*aW;>=w33oizcoQYOa~(FqM&__}he z5QX+3vQkZ$S5M4aX~FigGRZFb&|v?QhL$N;%BuCt@)PcZ!e0Ed6w%U z&biC?l};l0*mL<)qEeah@#B`4ig5WVT$cTYxm>GFk!WQKW?H%bUg>(Z;Jf(_th<*` zZwi%nmfCq$I!o3oM|6eX6;3-Ul2ke}>WQH*ypilj1HY%@{>b;_aNxx- Q3NM&#n=9Vb00Q$pf0BSvbpQYW literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.be4ce3ac-b849-4013-9591-15dea11586ae-m0.avro.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.be4ce3ac-b849-4013-9591-15dea11586ae-m0.avro.crc new file mode 100644 index 0000000000000000000000000000000000000000..00434c94db8309c8049909a65f8352eed8107ec3 GIT binary patch literal 64 zcmV-G0KflZa$^7h00IDB-S;4|ucIjNNL*s^kP`jg#IoMZRR?)+7#olBf$v|f0TdH3 WLh45~s8mIrbHF2%KSjk6YhQnl*Bl%G literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.snap-3369848726892806393-1-be4ce3ac-b849-4013-9591-15dea11586ae.avro.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.snap-3369848726892806393-1-be4ce3ac-b849-4013-9591-15dea11586ae.avro.crc new file mode 100644 index 0000000000000000000000000000000000000000..b9f6f6a05b9ef18fc3b8e1a4f5877ff4dc6a41a4 GIT binary patch literal 44 zcmYc;N@ieSU}7ja5>(lm9K;mLKe6I;^+dJ>9#c8|JlBS>ct<8 literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.v1.metadata.json.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.v1.metadata.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..1c4e65b5c2e4f473006ec12a0a027d3b84bf7fbe GIT binary patch literal 16 XcmYc;N@ieSU}6xs@HH^_&CdS-CU6E= literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.v2.metadata.json.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.v2.metadata.json.crc new file mode 100644 index 0000000000000000000000000000000000000000..7a1b739a9d6b9a58b0efba63effb0b5f8e3cc595 GIT binary patch literal 28 kcmYc;N@ieSU}8{akl5t*kV8WM_RmRc?{1jpm*?>Y0Ck-T!~g&Q literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.version-hint.text.crc b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/.version-hint.text.crc new file mode 100644 index 0000000000000000000000000000000000000000..20031206a3b58c7bd0e0b0cf48215fa64e60ea8c GIT binary patch literal 12 TcmYc;N@ieSU}BKEx{ntC5%2=_ literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/be4ce3ac-b849-4013-9591-15dea11586ae-m0.avro b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/be4ce3ac-b849-4013-9591-15dea11586ae-m0.avro new file mode 100644 index 0000000000000000000000000000000000000000..dff22548304528b76d438b8678a5ae3f5c1c6143 GIT binary patch literal 6710 zcmb_ge~1)S99J)rF*h7b3fNCB2PW2Gd<17&UJS7tv5M4v(C((o!5Di z5{6J=5lUi`VHRa5Wnledij`R>2o?52@yA~h*FO@HoqgY%o%i0(?W}uy2g||Dy!ZX_ z`TqROveb*Cr%ZD74xH7FkM5Sdt{j#UlIuBn-IHRH|E9)BONn?)${5&8yHa9Jw?v-E zG0D=hjhC+%`!6B z^^(}~oGy4SC@mauJ~VG&d5Ip5NrSa%@KwX~euVS7nxqJLqHWaUj0u29q3 zV^SJtw7ls_3G!T&B#V;3J#6VXY2~vi>@f9}OH&1D8N4K- zyD_9J1phSC@VbI$fmRwOSrx6PfaEl$z?^jn+xAS6H zY2F7~TWLCzsQQOpWcW%5av{-!GuYUSlZKT{bwR|L?|xwg3Rc>lX3}&)E*vM~gX3b* z#AMKIGoQ7RkVBqSNhE~tL32o1!z32pmA26Y4hRAd5RQHir@R6f@fv7_8E*A%5H04Y*bIOL$;6{0gu7CWh>StAaX z+6Z8&O;u!@I;aTBWt-YmC0JzClX```LKHRD9{ma~n{2AsJWP8Nc9JQ;99F2H;S8gv zDfJ}J8mfR!sgEwy5}jFTs0uskFQWdI1xcwBtjXtcBMTDgW@9@$L3A5r8`|Yx;Mc+6x>58(@vF*iz;>_HwSB6(#~XD z>{Xt1+4i;A3 z&cc#qxG<0D&_XSXh?X+8iD~TG4xr_vA(u<>4q^lgP#c_uB@@nqj zlve3>8taq6I#cuTOn;TF>BNo;3;7sDV*6qZ*%u)WW8yOn9Yb$hUtnZs=CSpuGV%v> zFigiH2q5v6lz1^^EGB0BYDP2NFnKIoR=LVS#Hk_ywM+nI1%v5VMN$+dMPyw3s>pr^ zgG!?(aA=hq#pzdRVIxn+SXkmboD`Y@5sR=RM4!qLGp>PuEub10C96!w;tDAm*T=)e zvLC9ew6-!~8UcF7ZQ>amB4(w{3i_wdf6Eg!u7oF)pXDKnR*9-e%Eq-cvMLCXH7>uX zlo`Z?4#gyxMgBOD8|XjeBNC;kbkr7JfewH2CITeewIqj&2Y#etyqJee|LScxer5Ikfh)bA{rA@618dJ; rynZyf%{tNd?v_7s?*wDwz?SnD`;YdHJuu_ewu8NY|91PXFe?88({|Jj literal 0 HcmV?d00001 diff --git a/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/snap-3369848726892806393-1-be4ce3ac-b849-4013-9591-15dea11586ae.avro b/server/core/src/testFixtures/resources/iceberg/samples/test_db/icebergtable1/metadata/snap-3369848726892806393-1-be4ce3ac-b849-4013-9591-15dea11586ae.avro new file mode 100644 index 0000000000000000000000000000000000000000..358d3da7507cd92264d7c34e4196fd4ebb7ae82a GIT binary patch literal 4310 zcmbW4UuYaf9LHl2tbd4YsYPjwlO=5wle(MBUG5HpifKz}z?zG`)Nq;Gox9t1|D3zC zIg=XE7p;8@f>IUrAygC#K9s&#lnPZUDvA&Y1);tuKKi0UKxg*P%`{9yfA9mF`GT{e9XC=JcKvGe1kZL=cG$vWlq`ysQ=~d+7a#c4F zd0f?zp}A7=)Y-Nb4T$SxMHtA%j&9Y+Z_QRooLO6iShp?k+}v@{)NvggEdig-qK@mD zRLsiq>R5Yh16egOx?u~k=WPrPKobP6zJh>mfyy#QZj^2qSP=p@po4XqPuXoCwM@t? zaoxf!>k|1B9G?N6i!^|3&~$X%n(u@VR<$jR{P)8uh{2u?{&ay|1-OntRN5ktr!|P- z7*M7)WT3Xpl_>$?q8B}6sik0~*r))hQN_+NA)smzT-sBT zvfA+e{)7;f%?zvW=BI(}YAj=vEL^>%5#D_aje{u>1VstlAn<2gue9h|&H? z8wu%9CX*}VC-YMixqM+FFXu9a%pl>I5RWe*$0c*Ee_|*SQg;Rgy zA$F3+-h&IE=O>^j?G!fuQ#diP zO7#OAF~Jb*VhK(l1tJdfWM{wt$`k`|PtZl!UOWPaWf?5$Id=ys&{)ArJ-7u<%`s}S zPw0l+rEg$RIm4hyj-eZ5sS5|ed6O-?gaMxd6;DgMm*526JT^IZ24Uk=^emji+Z9dH zonfN!nn+Rj#R~cpo7O3nik-zar@?lh@ZPgZ^&FlRNpJ_8z`GsQsAoV_o}~CPR|O_J zbh`1nRH5NA^-2ik_y!=GOFoZ_CHp;@gy9^$5q^Z^bAY)Au*@H zq`+%s^rHu}@Y7k@kP_^ZcG&OKXNylZ_yuVwGO^NS}3 z7iX>veSToDF!{p+*KWNw|Hs-J>K!*Oym@Wq&@N+m?%d5=dq*$!eez1>y^X~iyGI}V z_N^0Jzr4G#_20Q~)?QzeuH3)pp8MW*E`K+F Date: Mon, 22 Jan 2024 15:45:53 +0100 Subject: [PATCH 03/20] fix simpleIcebergTest --- .../core/services/IcebergCatalogServiceTest.java | 6 ++++-- .../java/io/whitefox/IcebergTestUtils.java | 11 +---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index 3f8d596a5..69f8e2335 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import io.whitefox.IcebergTestUtils; import java.io.IOException; import java.util.Map; import org.apache.hadoop.conf.Configuration; @@ -53,8 +54,9 @@ public class IcebergCatalogServiceTest { void simpleIcebergTest() throws IOException { Map catalogProps = Map.of( "warehouse", - "/Users/marco/agilelab_wa/lake-sharing/server/core/src/testFixtures/resources/iceberg/samples/", - "io.manifest.cache-enabled", "true"); + IcebergTestUtils.icebergTablesRoot.toString(), + "io.manifest.cache-enabled", + "true"); try (HadoopCatalog hadoopCatalog = new HadoopCatalog()) { // Initialize your catalog hadoopCatalog.setConf(new Configuration()); diff --git a/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java b/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java index 9ce6b2d22..89ecf14ff 100644 --- a/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java +++ b/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java @@ -7,7 +7,7 @@ public class IcebergTestUtils extends TestUtils { - private static final Path icebergTablesRoot = Paths.get(".") + public static final Path icebergTablesRoot = Paths.get(".") .toAbsolutePath() .getParent() .getParent() @@ -15,15 +15,6 @@ public class IcebergTestUtils extends TestUtils { .resolve("src/testFixtures/resources/iceberg/samples") .toAbsolutePath(); - public static String icebergTableUri(String tableName) { - return icebergTablesRoot - .resolve(tableName) - .toAbsolutePath() - .normalize() - .toUri() - .toString(); - } - public static InternalTable icebergTableWithHadoopCatalog(String database, String tableName) { var mrFoxPrincipal = new Principal("Mr. Fox"); return new InternalTable( From c201b06524568d02c561a58b1e503c48ab13bd08 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 15:50:09 +0100 Subject: [PATCH 04/20] disable IcebergCatalogServiceTest on windows --- .../io/whitefox/core/services/IcebergCatalogServiceTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index 69f8e2335..17baddfa5 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -1,6 +1,7 @@ package io.whitefox.core.services; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.condition.OS.WINDOWS; import io.whitefox.IcebergTestUtils; import java.io.IOException; @@ -10,7 +11,9 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +@DisabledOnOs(WINDOWS) public class IcebergCatalogServiceTest { /** From 2b2dbf69ca8d00b6f216e2f9450dfe8ca1abed23 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 16:02:19 +0100 Subject: [PATCH 05/20] improve doc of simpleIcebergTest --- .../services/IcebergCatalogServiceTest.java | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index 17baddfa5..cb0264e29 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -23,7 +23,7 @@ public class IcebergCatalogServiceTest { * spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 \ * --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \ * --conf spark.sql.catalog.spark_catalog.type=hadoop \ - * --conf spark.sql.catalog.spark_catalog.warehouse=/Users/marco/agilelab_wa/lake-sharing/server/core/src/testFixtures/resources/iceberg/samples/ \ + * --conf spark.sql.catalog.spark_catalog.warehouse=/Volumes/repos/oss/whitefox/server/core/src/testFixtures/resources/iceberg/samples/ \ * --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions * Take care that the version of iceberg must be compatible with the version of spark and scala you are using * (i.e. I'm using iceberg 3.5 on scala 2.12 because my local spark-shell is version 3.5.0 using scala 2.12 @@ -38,7 +38,7 @@ public class IcebergCatalogServiceTest { * import org.apache.hadoop.conf.Configuration; * * val catalogProps = Map.of( - * "warehouse", "/Users/marco/agilelab_wa/lake-sharing/server/core/src/testFixtures/resources/iceberg/samples/", + * "warehouse", "/Volumes/repos/oss/whitefox/server/core/src/testFixtures/resources/iceberg/samples/", * "io.manifest.cache-enabled", "true"); * val catalog = new HadoopCatalog(); * catalog.setConf(new Configuration()); @@ -55,20 +55,17 @@ public class IcebergCatalogServiceTest { */ @Test void simpleIcebergTest() throws IOException { - Map catalogProps = Map.of( - "warehouse", - IcebergTestUtils.icebergTablesRoot.toString(), - "io.manifest.cache-enabled", - "true"); try (HadoopCatalog hadoopCatalog = new HadoopCatalog()) { - // Initialize your catalog + // Initialize catalog hadoopCatalog.setConf(new Configuration()); - hadoopCatalog.initialize("hadoop", catalogProps); + hadoopCatalog.initialize( + "test_hadoop_catalog", + Map.of("warehouse", IcebergTestUtils.icebergTablesRoot.toString())); TableIdentifier tableIdentifier = TableIdentifier.of("test_db", "icebergtable1"); // Load the Iceberg table Table table = hadoopCatalog.loadTable(tableIdentifier); - assertEquals("hadoop.test_db.icebergtable1", table.name()); + assertEquals("test_hadoop_catalog.test_db.icebergtable1", table.name()); } } } From 95971685229337f5411f6ad738e3edf9bad4e140 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 16:05:19 +0100 Subject: [PATCH 06/20] improve doc of simpleIcebergTest --- .../io/whitefox/core/services/IcebergCatalogServiceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index cb0264e29..dfac19dd1 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -28,7 +28,7 @@ public class IcebergCatalogServiceTest { * Take care that the version of iceberg must be compatible with the version of spark and scala you are using * (i.e. I'm using iceberg 3.5 on scala 2.12 because my local spark-shell is version 3.5.0 using scala 2.12 * - * First, uou need to create an iceberg table on your local hadoop catalog + * First, uou need to create an iceberg table with your local hadoop catalog * {{{ * import org.apache.iceberg.catalog.Namespace; * import org.apache.iceberg.Schema; From 7ee67f67b0cc09c81a947160b86b148ef0492473 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 16:13:57 +0100 Subject: [PATCH 07/20] improve doc of simpleIcebergTest --- .../services/IcebergCatalogServiceTest.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index dfac19dd1..42615df08 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -30,23 +30,22 @@ public class IcebergCatalogServiceTest { * * First, uou need to create an iceberg table with your local hadoop catalog * {{{ - * import org.apache.iceberg.catalog.Namespace; - * import org.apache.iceberg.Schema; - * import org.apache.iceberg.catalog.TableIdentifier; - * import org.apache.iceberg.hadoop.HadoopCatalog; - * import java.util.Map; - * import org.apache.hadoop.conf.Configuration; + * import org.apache.iceberg.catalog.Namespace + * import org.apache.iceberg.Schema + * import org.apache.iceberg.catalog.TableIdentifier + * import org.apache.iceberg.hadoop.HadoopCatalog + * import java.util.Map + * import org.apache.hadoop.conf.Configuration * - * val catalogProps = Map.of( - * "warehouse", "/Volumes/repos/oss/whitefox/server/core/src/testFixtures/resources/iceberg/samples/", - * "io.manifest.cache-enabled", "true"); - * val catalog = new HadoopCatalog(); - * catalog.setConf(new Configuration()); - * catalog.initialize("hadoop", catalogProps); - * catalog.createNamespace(Namespace.of("test_db")); + * val catalog = new HadoopCatalog() + * catalog.setConf(new Configuration()) + * catalog.initialize("test_hadoop_catalog", + * Map.of("warehouse", "/Volumes/repos/oss/whitefox/server/core/src/testFixtures/resources/iceberg/samples/")) + * catalog.createNamespace(Namespace.of("test_db")) * val schema = new Schema(org.apache.iceberg.types.Types.NestedField.required(1, "id", org.apache.iceberg.types.Types.LongType.get())) * catalog.createTable(TableIdentifier.of("test_db", "icebergtable1"), schema) * }}} + * * Then, you can append data on your iceberg table * {{{ * val data = spark.range(0, 5) From f86c349c1093ae6ab3717ea1fc7b557dfe2813ab Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 19:39:03 +0100 Subject: [PATCH 08/20] use CatalogProperties.Warehouse --- .../main/java/io/whitefox/core/services/IcebergSharedTable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java index 3e28d6c6f..82957e0c7 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java @@ -82,7 +82,7 @@ private static BaseMetastoreCatalog newCatalog( catalog.initialize( metastore.name(), Map.of( - "warehouse", + CatalogProperties.WAREHOUSE_LOCATION, ((MetastoreProperties.HadoopMetastoreProperties) metastore.properties()) .location())); return catalog; From 030b47c3700baa9b960587ecae425f66312a5e0e Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 19:55:40 +0100 Subject: [PATCH 09/20] create an aws glue table and write a test using GlueCatalog --- server/core/build.gradle.kts | 4 +- .../services/IcebergCatalogServiceTest.java | 64 ++++++++++++++++++- .../resources/application.properties | 6 +- 3 files changed, 69 insertions(+), 5 deletions(-) diff --git a/server/core/build.gradle.kts b/server/core/build.gradle.kts index d1a194bc1..1f1db69a0 100644 --- a/server/core/build.gradle.kts +++ b/server/core/build.gradle.kts @@ -34,7 +34,9 @@ dependencies { implementation("org.apache.iceberg:iceberg-api:1.4.3") implementation("org.apache.iceberg:iceberg-core:1.4.3") implementation("org.apache.iceberg:iceberg-aws:1.4.3") - compileOnly("software.amazon.awssdk:glue:2.22.10") + implementation("software.amazon.awssdk:glue:2.22.10") + implementation("software.amazon.awssdk:sts:2.22.10") + implementation("software.amazon.awssdk:s3:2.22.10") //AWS compileOnly(String.format("com.amazonaws:aws-java-sdk-bom:%s", awsSdkVersion)) diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index 42615df08..7d49121c5 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -8,6 +8,7 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.Table; +import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; import org.junit.jupiter.api.Test; @@ -18,17 +19,18 @@ public class IcebergCatalogServiceTest { /** * This is some sample code that you need to run in your spark shell to generate new iceberg tables for new test cases: - * To run the spark-shell with delta support execute: + * To run the spark-shell with iceberg support execute: * {{{ * spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 \ * --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \ * --conf spark.sql.catalog.spark_catalog.type=hadoop \ * --conf spark.sql.catalog.spark_catalog.warehouse=/Volumes/repos/oss/whitefox/server/core/src/testFixtures/resources/iceberg/samples/ \ * --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions + * * Take care that the version of iceberg must be compatible with the version of spark and scala you are using * (i.e. I'm using iceberg 3.5 on scala 2.12 because my local spark-shell is version 3.5.0 using scala 2.12 * - * First, uou need to create an iceberg table with your local hadoop catalog + * First, you need to create an iceberg table with your local hadoop catalog * {{{ * import org.apache.iceberg.catalog.Namespace * import org.apache.iceberg.Schema @@ -53,7 +55,7 @@ public class IcebergCatalogServiceTest { * }}} */ @Test - void simpleIcebergTest() throws IOException { + void localIcebergTableWithHadoopCatalogTest() throws IOException { try (HadoopCatalog hadoopCatalog = new HadoopCatalog()) { // Initialize catalog hadoopCatalog.setConf(new Configuration()); @@ -67,4 +69,60 @@ void simpleIcebergTest() throws IOException { assertEquals("test_hadoop_catalog.test_db.icebergtable1", table.name()); } } + + /** + * This is some sample code that you need to run in your spark shell to generate new iceberg tables on s3 for new test cases: + * First, you need to retrieve the aws credentials of your aws account, then: + * + * {{{ + * export AWS_ACCESS_KEY_ID='************' + * export AWS_SECRET_ACCESS_KEY='*******************************' + * export AWS_SESSION_TOKEN='************************************************************' + * spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 \ + * --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \ + * --conf spark.sql.catalog.spark_catalog.warehouse=specify-your-s3-bucket \ + * --conf spark.sql.catalog.spark_catalog.catalog-impl=org.apache.iceberg.aws.glue.GlueCatalog \ + * --conf spark.sql.catalog.spark_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO \ + * --conf spark.sql.catalog.spark_catalog.glue.skip-name-validation=false + * }}} + * + * Take care that the version of iceberg must be compatible with the version of spark and scala you are using + * (i.e. I'm using iceberg 3.5 on scala 2.12 because my local spark-shell is version 3.5.0 using scala 2.12 + * + * Now, you need to create an iceberg table with your aws glue catalog + * {{{ + * import org.apache.iceberg.catalog.Namespace + * import org.apache.iceberg.Schema + * import org.apache.iceberg.catalog.TableIdentifier + * import org.apache.iceberg.hadoop.HadoopCatalog + * import java.util.Map + * import org.apache.hadoop.conf.Configuration + * + * val catalog = new GlueCatalog() + * catalog.setConf(new Configuration()) + * catalog.initialize("test_glue_catalog", Map.of( + * "warehouse", "specify-your-s3-bucket")) + * catalog.createNamespace(Namespace.of("test_glue_db")) + * val schema = new Schema(org.apache.iceberg.types.Types.NestedField.required(1, "id", org.apache.iceberg.types.Types.LongType.get())) + * catalog.createTable(TableIdentifier.of("test_glue_db", "icebergtable1"), schema) + * + * Then, you can append data on your iceberg table + * {{{ + * val data = spark.range(0, 5) + * data.writeTo("test_glue_db.icebergtable1").append() + * }}} + */ + @Test + void s3IcebergTableWithAwsGlueCatalogTest() throws IOException { + try (GlueCatalog glueCatalog = new GlueCatalog()) { + // Initialize catalog + glueCatalog.setConf(new Configuration()); + glueCatalog.initialize("test_glue_catalog", Map.of()); + TableIdentifier tableIdentifier = TableIdentifier.of("test_glue_db", "icebergtable1"); + + // Load the Iceberg table + Table table = glueCatalog.loadTable(tableIdentifier); + assertEquals("test_glue_catalog.test_glue_db.icebergtable1", table.name()); + } + } } diff --git a/server/core/src/testFixtures/resources/application.properties b/server/core/src/testFixtures/resources/application.properties index a02839f43..f76146c04 100644 --- a/server/core/src/testFixtures/resources/application.properties +++ b/server/core/src/testFixtures/resources/application.properties @@ -3,4 +3,8 @@ io.delta.sharing.api.server.defaultMaxResults=10 whitefox.provider.aws.test.region=${WHITEFOX_TEST_AWS_REGION} whitefox.provider.aws.test.accessKey=${WHITEFOX_TEST_AWS_ACCESS_KEY_ID} -whitefox.provider.aws.test.secretKey=${WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY} \ No newline at end of file +whitefox.provider.aws.test.secretKey=${WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY} + +personal.provider.aws.test.region=${TEST_AWS_REGION} +personal.provider.aws.test.accessKey=${TEST_AWS_ACCESS_KEY_ID} +personal.provider.aws.test.secretKey=${TEST_AWS_SECRET_ACCESS_KEY} \ No newline at end of file From 5b4abdb3e4fed70979181903ed3cfe06936cf532 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 20:07:48 +0100 Subject: [PATCH 10/20] naive attempt --- .github/workflows/compile.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/compile.yaml b/.github/workflows/compile.yaml index 927b54ad9..673a0f313 100644 --- a/.github/workflows/compile.yaml +++ b/.github/workflows/compile.yaml @@ -28,6 +28,7 @@ jobs: WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} run: | if [ "$RUNNER_OS" == "Windows" ]; then export HADOOP_HOME="$(pwd)/.github/workflows/hadoop3-win-binaries" @@ -41,6 +42,7 @@ jobs: WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} run: | WHITEFOX_SERVER_AUTHENTICATION_ENABLED=TRUE \ WHITEFOX_SERVER_AUTHENTICATION_BEARERTOKEN=token \ From 8edfe3abdf199613a2fc338827a1caa45adb2c5f Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 20:15:49 +0100 Subject: [PATCH 11/20] add s3TestConfig --- .github/workflows/compile.yaml | 2 -- .../services/IcebergCatalogServiceTest.java | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/compile.yaml b/.github/workflows/compile.yaml index 673a0f313..927b54ad9 100644 --- a/.github/workflows/compile.yaml +++ b/.github/workflows/compile.yaml @@ -28,7 +28,6 @@ jobs: WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} run: | if [ "$RUNNER_OS" == "Windows" ]; then export HADOOP_HOME="$(pwd)/.github/workflows/hadoop3-win-binaries" @@ -42,7 +41,6 @@ jobs: WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} run: | WHITEFOX_SERVER_AUTHENTICATION_ENABLED=TRUE \ WHITEFOX_SERVER_AUTHENTICATION_BEARERTOKEN=token \ diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index 7d49121c5..5f1f71cc4 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -6,6 +6,8 @@ import io.whitefox.IcebergTestUtils; import java.io.IOException; import java.util.Map; + +import io.whitefox.S3TestConfig; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.Table; import org.apache.iceberg.aws.glue.GlueCatalog; @@ -17,6 +19,9 @@ @DisabledOnOs(WINDOWS) public class IcebergCatalogServiceTest { + private final S3TestConfig s3TestConfig = S3TestConfig.loadFromEnv(); + + /** * This is some sample code that you need to run in your spark shell to generate new iceberg tables for new test cases: * To run the spark-shell with iceberg support execute: @@ -116,7 +121,7 @@ void localIcebergTableWithHadoopCatalogTest() throws IOException { void s3IcebergTableWithAwsGlueCatalogTest() throws IOException { try (GlueCatalog glueCatalog = new GlueCatalog()) { // Initialize catalog - glueCatalog.setConf(new Configuration()); + glueCatalog.setConf(buildConfig()); glueCatalog.initialize("test_glue_catalog", Map.of()); TableIdentifier tableIdentifier = TableIdentifier.of("test_glue_db", "icebergtable1"); @@ -125,4 +130,16 @@ void s3IcebergTableWithAwsGlueCatalogTest() throws IOException { assertEquals("test_glue_catalog.test_glue_db.icebergtable1", table.name()); } } + + private Configuration buildConfig(){ + var configuration = new Configuration(); + configuration.set( + "fs.s3a.aws.credentials.provider", + "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"); + configuration.set("fs.s3a.access.key", s3TestConfig.accessKey()); + configuration.set("fs.s3a.secret.key", s3TestConfig.secretKey()); + configuration.set("fs.s3a.endpoint.region", s3TestConfig.region()); + return configuration; + + } } From 048bfcf3264f2f0d5baac3e146d2a6874de0d11f Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 20:19:06 +0100 Subject: [PATCH 12/20] spotlessApply --- .../core/services/IcebergCatalogServiceTest.java | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index 5f1f71cc4..bf05929ec 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -4,10 +4,9 @@ import static org.junit.jupiter.api.condition.OS.WINDOWS; import io.whitefox.IcebergTestUtils; +import io.whitefox.S3TestConfig; import java.io.IOException; import java.util.Map; - -import io.whitefox.S3TestConfig; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.Table; import org.apache.iceberg.aws.glue.GlueCatalog; @@ -19,8 +18,7 @@ @DisabledOnOs(WINDOWS) public class IcebergCatalogServiceTest { - private final S3TestConfig s3TestConfig = S3TestConfig.loadFromEnv(); - + private final S3TestConfig s3TestConfig = S3TestConfig.loadFromEnv(); /** * This is some sample code that you need to run in your spark shell to generate new iceberg tables for new test cases: @@ -131,15 +129,13 @@ void s3IcebergTableWithAwsGlueCatalogTest() throws IOException { } } - private Configuration buildConfig(){ + private Configuration buildConfig() { var configuration = new Configuration(); configuration.set( - "fs.s3a.aws.credentials.provider", - "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"); + "fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"); configuration.set("fs.s3a.access.key", s3TestConfig.accessKey()); configuration.set("fs.s3a.secret.key", s3TestConfig.secretKey()); configuration.set("fs.s3a.endpoint.region", s3TestConfig.region()); return configuration; - } } From 1075551f23bab090b764a55c4557efed40b9a09f Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Mon, 22 Jan 2024 20:35:44 +0100 Subject: [PATCH 13/20] try adding envs --- .github/workflows/compile.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/compile.yaml b/.github/workflows/compile.yaml index 927b54ad9..f36c4eec8 100644 --- a/.github/workflows/compile.yaml +++ b/.github/workflows/compile.yaml @@ -25,6 +25,9 @@ jobs: uses: gradle/gradle-build-action@v2 - name: Execute Gradle build env: + AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} + AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} @@ -38,6 +41,9 @@ jobs: - name: Run integration test shell: bash env: + AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} + AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} From d93deef56c4fb7eca7519202381f782afea53077 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Tue, 23 Jan 2024 14:19:42 +0100 Subject: [PATCH 14/20] remove env vars --- .github/workflows/compile.yaml | 6 ------ .../services/IcebergCatalogServiceTest.java | 20 +++++++------------ 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/.github/workflows/compile.yaml b/.github/workflows/compile.yaml index f36c4eec8..927b54ad9 100644 --- a/.github/workflows/compile.yaml +++ b/.github/workflows/compile.yaml @@ -25,9 +25,6 @@ jobs: uses: gradle/gradle-build-action@v2 - name: Execute Gradle build env: - AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} - AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} @@ -41,9 +38,6 @@ jobs: - name: Run integration test shell: bash env: - AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} - AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index bf05929ec..503e9c26d 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -1,5 +1,6 @@ package io.whitefox.core.services; +import static org.apache.iceberg.aws.AwsProperties.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.condition.OS.WINDOWS; @@ -119,23 +120,16 @@ void localIcebergTableWithHadoopCatalogTest() throws IOException { void s3IcebergTableWithAwsGlueCatalogTest() throws IOException { try (GlueCatalog glueCatalog = new GlueCatalog()) { // Initialize catalog - glueCatalog.setConf(buildConfig()); - glueCatalog.initialize("test_glue_catalog", Map.of()); + glueCatalog.setConf(new Configuration()); + glueCatalog.initialize( + "test_glue_catalog", + Map.of( + "glue.id", "583975731810", + "client.region", "eu-west-1")); TableIdentifier tableIdentifier = TableIdentifier.of("test_glue_db", "icebergtable1"); - // Load the Iceberg table Table table = glueCatalog.loadTable(tableIdentifier); assertEquals("test_glue_catalog.test_glue_db.icebergtable1", table.name()); } } - - private Configuration buildConfig() { - var configuration = new Configuration(); - configuration.set( - "fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider"); - configuration.set("fs.s3a.access.key", s3TestConfig.accessKey()); - configuration.set("fs.s3a.secret.key", s3TestConfig.secretKey()); - configuration.set("fs.s3a.endpoint.region", s3TestConfig.region()); - return configuration; - } } From ba548507c52370a60578382be761ef2c5102a961 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Tue, 23 Jan 2024 18:23:53 +0100 Subject: [PATCH 15/20] override StaticCredentialsProvider and implement tests against aws glue --- .github/workflows/compile.yaml | 2 + .../api/deltasharing/SampleTables.java | 39 +----- .../aws/utils/StaticCredentialsProvider.java | 35 ++++++ .../core/services/IcebergSharedTable.java | 115 ++++++++++++------ .../services/IcebergCatalogServiceTest.java | 18 ++- .../core/services/IcebergTableLoaderTest.java | 22 +++- .../java/io/whitefox/AwsGlueTestConfig.java | 33 +++++ .../java/io/whitefox/DeltaTestUtils.java | 21 +++- .../java/io/whitefox/IcebergTestUtils.java | 34 ++++++ .../java/io/whitefox/TestUtils.java | 16 +++ .../resources/application.properties | 5 +- 11 files changed, 257 insertions(+), 83 deletions(-) create mode 100644 server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java create mode 100644 server/core/src/testFixtures/java/io/whitefox/AwsGlueTestConfig.java diff --git a/.github/workflows/compile.yaml b/.github/workflows/compile.yaml index 927b54ad9..9d94dc8cb 100644 --- a/.github/workflows/compile.yaml +++ b/.github/workflows/compile.yaml @@ -28,6 +28,7 @@ jobs: WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} + WHITEFOX_TEST_GLUE_CATALOG_ID: ${{ secrets.WHITEFOX_GLUE_CATALOG_ID }} run: | if [ "$RUNNER_OS" == "Windows" ]; then export HADOOP_HOME="$(pwd)/.github/workflows/hadoop3-win-binaries" @@ -41,6 +42,7 @@ jobs: WHITEFOX_TEST_AWS_REGION: ${{ vars.WHITEFOX_AWS_REGION }} WHITEFOX_TEST_AWS_ACCESS_KEY_ID: ${{ secrets.WHITEFOX_AWS_ACCESS_KEY_ID }} WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY: ${{ secrets.WHITEFOX_AWS_SECRET_ACCESS_KEY }} + WHITEFOX_TEST_GLUE_CATALOG_ID: ${{ secrets.WHITEFOX_GLUE_CATALOG_ID }} run: | WHITEFOX_SERVER_AUTHENTICATION_ENABLED=TRUE \ WHITEFOX_SERVER_AUTHENTICATION_BEARERTOKEN=token \ diff --git a/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java b/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java index 4a12bc5ed..777cc5523 100644 --- a/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java +++ b/server/app/src/test/java/io/whitefox/api/deltasharing/SampleTables.java @@ -6,12 +6,13 @@ import io.whitefox.api.deltasharing.model.FileObjectFileWithoutPresignedUrl; import io.whitefox.api.deltasharing.model.FileObjectWithoutPresignedUrl; import io.whitefox.api.deltasharing.model.v1.generated.*; -import io.whitefox.core.*; +import io.whitefox.core.InternalTable; +import io.whitefox.core.Principal; +import io.whitefox.core.SharedTable; import io.whitefox.persistence.StorageManager; import io.whitefox.persistence.memory.InMemoryStorageManager; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; public class SampleTables { @@ -34,40 +35,6 @@ public static InternalTable s3DeltaTableWithHistory1(S3TestConfig s3TestConfig) public static final InternalTable deltaTableWithHistory1 = deltaTable("delta-table-with-history"); - public static InternalTable s3DeltaTable(String s3TableName, S3TestConfig s3TestConfig) { - var mrFoxPrincipal = new Principal("Mr. Fox"); - return new InternalTable( - s3TableName, - Optional.empty(), - new InternalTable.DeltaTableProperties(s3DeltaTableUri(s3TableName)), - Optional.of(0L), - 0L, - mrFoxPrincipal, - 0L, - mrFoxPrincipal, - getProvider(getS3Storage(mrFoxPrincipal, s3TestConfig), mrFoxPrincipal, Optional.empty())); - } - - public static Storage getS3Storage(Principal principal, S3TestConfig s3TestConfig) { - return new Storage( - "storage", - Optional.empty(), - principal, - StorageType.S3, - Optional.empty(), - "uri", - 0L, - principal, - 0L, - principal, - new StorageProperties.S3Properties(new AwsCredentials.SimpleAwsCredentials( - s3TestConfig.accessKey(), s3TestConfig.secretKey(), s3TestConfig.region()))); - } - - public static String s3DeltaTableUri(String s3TableName) { - return String.format("s3a://whitefox-s3-test-bucket/delta/samples/%s", s3TableName); - } - public static StorageManager createStorageManager() { return new InMemoryStorageManager(List.of(new io.whitefox.core.Share( "name", diff --git a/server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java b/server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java new file mode 100644 index 000000000..fa4bae221 --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java @@ -0,0 +1,35 @@ +package io.whitefox.core.aws.utils; + +import java.util.Map; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + +public class StaticCredentialsProvider implements AwsCredentialsProvider { + + private final AwsCredentials credentials; + + public static AwsCredentialsProvider create(Map properties) { + return software.amazon.awssdk.auth.credentials.StaticCredentialsProvider.create( + retrieveCredentials(properties)); + } + + private static AwsCredentials retrieveCredentials(Map properties) { + if (!properties.containsKey("accessKeyId")) { + throw new RuntimeException("accessKeyId not found"); + } else if (!properties.containsKey("secretAccessKey")) { + throw new RuntimeException("secretAccessKey not found"); + } + return AwsBasicCredentials.create( + properties.get("accessKeyId"), properties.get("secretAccessKey")); + } + + private StaticCredentialsProvider(Map properties) { + this.credentials = retrieveCredentials(properties); + } + + @Override + public AwsCredentials resolveCredentials() { + return credentials; + } +} diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java index 82957e0c7..8afcb368d 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java @@ -1,17 +1,20 @@ package io.whitefox.core.services; import io.whitefox.core.*; +import io.whitefox.core.aws.utils.StaticCredentialsProvider; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Optional; import org.apache.commons.lang3.NotImplementedException; -import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.BaseMetastoreCatalog; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.Table; +import org.apache.iceberg.aws.AwsClientProperties; +import org.apache.iceberg.aws.AwsProperties; import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.hadoop.HadoopCatalog; public class IcebergSharedTable implements AbstractSharedTable { @@ -31,18 +34,13 @@ public static IcebergSharedTable of( if (tableDetails.internalTable().properties() instanceof InternalTable.IcebergTableProperties) { var metastore = getMetastore(tableDetails.internalTable()); - var catalog = newCatalog( - metastore, - hadoopConfigBuilder, - tableDetails.internalTable().provider().storage()); var tableId = getTableIdentifier(tableDetails.internalTable()); - try { - return new IcebergSharedTable(catalog.loadTable(tableId), tableSchemaConverter); - } catch (Exception e) { - throw new IllegalArgumentException(String.format( - "Cannot found iceberg table [%s] under namespace [%s]", - tableId.name(), tableId.namespace())); - } + var table = loadTable( + metastore, + tableDetails.internalTable().provider().storage(), + tableId, + hadoopConfigBuilder); + return new IcebergSharedTable(table, tableSchemaConverter); } else { throw new IllegalArgumentException( String.format("%s is not an iceberg table", tableDetails.name())); @@ -64,36 +62,68 @@ private static Metastore getMetastore(InternalTable internalTable) { String.format("missing metastore for the iceberg table: [%s]", internalTable.name()))); } - private static BaseMetastoreCatalog newCatalog( - Metastore metastore, HadoopConfigBuilder hadoopConfigBuilder, Storage storage) { + private static Table loadTable( + Metastore metastore, + Storage storage, + TableIdentifier tableIdentifier, + HadoopConfigBuilder hadoopConfigBuilder) { if (metastore.type() == MetastoreType.GLUE) { - try (var catalog = new GlueCatalog()) { - Configuration conf = hadoopConfigBuilder.buildConfig(storage); - catalog.setConf(conf); - catalog.initialize(metastore.name(), setGlueProperties()); - return catalog; - } catch (IOException e) { - throw new RuntimeException("Unexpected exception when initializing the glue catalog", e); - } + return loadTableWithGlueCatalog(metastore, storage, tableIdentifier, hadoopConfigBuilder); } else if (metastore.type() == MetastoreType.HADOOP) { - try (var catalog = new HadoopCatalog()) { - Configuration conf = hadoopConfigBuilder.buildConfig(storage); - catalog.setConf(conf); - catalog.initialize( - metastore.name(), - Map.of( - CatalogProperties.WAREHOUSE_LOCATION, - ((MetastoreProperties.HadoopMetastoreProperties) metastore.properties()) - .location())); - return catalog; - } catch (IOException e) { - throw new RuntimeException("Unexpected exception when initializing the hadoop catalog", e); - } + return loadTableWithHadoopCatalog(metastore, storage, tableIdentifier, hadoopConfigBuilder); } else { throw new RuntimeException(String.format("Unknown metastore type: [%s]", metastore.type())); } } + private static Table loadTableWithGlueCatalog( + Metastore metastore, + Storage storage, + TableIdentifier tableIdentifier, + HadoopConfigBuilder hadoopConfigBuilder) { + try (var catalog = new GlueCatalog()) { + catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); + catalog.initialize( + metastore.name(), + setGlueProperties((MetastoreProperties.GlueMetastoreProperties) metastore.properties())); + return loadTable(catalog, tableIdentifier); + } catch (IOException e) { + throw new RuntimeException("Unexpected error when closing the Glue catalog", e); + } + } + + private static Table loadTableWithHadoopCatalog( + Metastore metastore, + Storage storage, + TableIdentifier tableIdentifier, + HadoopConfigBuilder hadoopConfigBuilder) { + try (var catalog = new HadoopCatalog()) { + catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); + catalog.initialize( + metastore.name(), + Map.of( + CatalogProperties.WAREHOUSE_LOCATION, + ((MetastoreProperties.HadoopMetastoreProperties) metastore.properties()).location())); + return loadTable(catalog, tableIdentifier); + } catch (IOException e) { + throw new RuntimeException("Unexpected error when closing the Hadoop catalog", e); + } + } + + private static Table loadTable(BaseMetastoreCatalog catalog, TableIdentifier tableIdentifier) { + try { + return catalog.loadTable(tableIdentifier); + } catch (NoSuchTableException e) { + throw new IllegalArgumentException(String.format( + "Cannot found iceberg table [%s] under namespace [%s]", + tableIdentifier.name(), tableIdentifier.namespace())); + } catch (Throwable e) { + throw new RuntimeException(String.format( + "Unexpected exception when loading the iceberg table [%s] under namespace [%s]", + tableIdentifier.name(), tableIdentifier.namespace())); + } + } + public static IcebergSharedTable of(SharedTable sharedTable) { return of(sharedTable, new TableSchemaConverter(), new HadoopConfigBuilder()); } @@ -112,10 +142,23 @@ public ReadTableResultToBeSigned queryTable(ReadTableRequest readTableRequest) { throw new NotImplementedException(); } - private static Map setGlueProperties() { + private static Map setGlueProperties( + MetastoreProperties.GlueMetastoreProperties glueMetastoreProperties) { + AwsCredentials.SimpleAwsCredentials credentials = + (AwsCredentials.SimpleAwsCredentials) glueMetastoreProperties.credentials(); Map properties = new HashMap<>(); properties.put(CatalogProperties.CATALOG_IMPL, "org.apache.iceberg.aws.glue.GlueCatalog"); properties.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.aws.s3.S3FileIO"); + properties.put(AwsProperties.GLUE_CATALOG_ID, glueMetastoreProperties.catalogId()); + properties.put(AwsClientProperties.CLIENT_REGION, credentials.region()); + properties.put( + AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, StaticCredentialsProvider.class.getName()); + properties.put( + String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "accessKeyId"), + credentials.awsAccessKeyId()); + properties.put( + String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "secretAccessKey"), + credentials.awsSecretAccessKey()); return properties; } } diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java index 503e9c26d..5077ecfcc 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergCatalogServiceTest.java @@ -1,15 +1,16 @@ package io.whitefox.core.services; -import static org.apache.iceberg.aws.AwsProperties.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.condition.OS.WINDOWS; import io.whitefox.IcebergTestUtils; import io.whitefox.S3TestConfig; +import io.whitefox.core.aws.utils.StaticCredentialsProvider; import java.io.IOException; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.Table; +import org.apache.iceberg.aws.AwsClientProperties; import org.apache.iceberg.aws.glue.GlueCatalog; import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.hadoop.HadoopCatalog; @@ -81,7 +82,6 @@ void localIcebergTableWithHadoopCatalogTest() throws IOException { * {{{ * export AWS_ACCESS_KEY_ID='************' * export AWS_SECRET_ACCESS_KEY='*******************************' - * export AWS_SESSION_TOKEN='************************************************************' * spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.2,org.apache.iceberg:iceberg-aws-bundle:1.4.2 \ * --conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog \ * --conf spark.sql.catalog.spark_catalog.warehouse=specify-your-s3-bucket \ @@ -119,13 +119,23 @@ void localIcebergTableWithHadoopCatalogTest() throws IOException { @Test void s3IcebergTableWithAwsGlueCatalogTest() throws IOException { try (GlueCatalog glueCatalog = new GlueCatalog()) { + // Initialize catalog glueCatalog.setConf(new Configuration()); glueCatalog.initialize( "test_glue_catalog", Map.of( - "glue.id", "583975731810", - "client.region", "eu-west-1")); + AwsClientProperties.CLIENT_REGION, + s3TestConfig.region(), + AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, + StaticCredentialsProvider.class.getName(), + String.format( + "%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "accessKeyId"), + s3TestConfig.accessKey(), + String.format( + "%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "secretAccessKey"), + s3TestConfig.secretKey())); + TableIdentifier tableIdentifier = TableIdentifier.of("test_glue_db", "icebergtable1"); // Load the Iceberg table Table table = glueCatalog.loadTable(tableIdentifier); diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java index 3b281b708..49a7ff915 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java @@ -1,9 +1,12 @@ package io.whitefox.core.services; import static io.whitefox.IcebergTestUtils.icebergTableWithHadoopCatalog; +import static io.whitefox.IcebergTestUtils.s3IcebergTableWithAwsGlueCatalog; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertThrows; +import io.whitefox.AwsGlueTestConfig; +import io.whitefox.S3TestConfig; import io.whitefox.core.SharedTable; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.DisabledOnOs; @@ -12,10 +15,12 @@ public class IcebergTableLoaderTest { private final IcebergTableLoader icebergTableLoader = new IcebergTableLoader(); + private final S3TestConfig s3TestConfig = S3TestConfig.loadFromEnv(); + private final AwsGlueTestConfig awsGlueTestConfig = AwsGlueTestConfig.loadFromEnv(); @Test @DisabledOnOs(OS.WINDOWS) - public void loadTable() { + public void loadLocalIcebergTableWithHadoopCatalog() { SharedTable sharedTable = new SharedTable( "icebergtable1", "schema", @@ -27,6 +32,21 @@ public void loadTable() { // assertEquals(0, icebergSharedTable.getTableVersion(Optional.empty()).get()); } + @Test + @DisabledOnOs(OS.WINDOWS) + public void loadS3IcebergTableWithAwsGlueCatalog() { + SharedTable sharedTable = new SharedTable( + "icebergtable1", + "s3schema", + "s3share", + s3IcebergTableWithAwsGlueCatalog( + s3TestConfig, awsGlueTestConfig, "test_glue_db", "icebergtable1")); + assertDoesNotThrow(() -> icebergTableLoader.loadTable(sharedTable)); + // TODO: add asserts here when IcebergSharedTable.getTableVersion has been implemented + // assertTrue(icebergSharedTable.getTableVersion(Optional.empty()).isPresent()); + // assertEquals(0, icebergSharedTable.getTableVersion(Optional.empty()).get()); + } + @Test public void loadUnknownTable() { SharedTable sharedTable = new SharedTable( diff --git a/server/core/src/testFixtures/java/io/whitefox/AwsGlueTestConfig.java b/server/core/src/testFixtures/java/io/whitefox/AwsGlueTestConfig.java new file mode 100644 index 000000000..88be1b98d --- /dev/null +++ b/server/core/src/testFixtures/java/io/whitefox/AwsGlueTestConfig.java @@ -0,0 +1,33 @@ +package io.whitefox; + +import jakarta.inject.Inject; +import jakarta.inject.Singleton; +import java.util.Optional; +import org.eclipse.microprofile.config.inject.ConfigProperty; + +@Singleton +public class AwsGlueTestConfig { + + public static AwsGlueTestConfig loadFromEnv() { + return new AwsGlueTestConfig( + Optional.ofNullable(System.getenv().get("WHITEFOX_TEST_GLUE_CATALOG_ID"))); + } + + @Inject + public AwsGlueTestConfig( + @ConfigProperty(name = "whitefox.provider.aws.test.glue.catalog.id") + Optional catalogId) { + this.catalogId = catalogId; + } + + private final Optional catalogId; + + public AwsGlueTestConfig(String catalogId) { + this.catalogId = Optional.of(catalogId); + } + + public String catalogId() { + return catalogId.orElseThrow(() -> new RuntimeException( + "Missing glue catalog configuration, " + "are you providing the aws glue catalog id?")); + } +} diff --git a/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java b/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java index 5ba01dbf9..2b0c29869 100644 --- a/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java +++ b/server/core/src/testFixtures/java/io/whitefox/DeltaTestUtils.java @@ -1,7 +1,6 @@ package io.whitefox; -import io.whitefox.core.InternalTable; -import io.whitefox.core.Principal; +import io.whitefox.core.*; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Optional; @@ -38,4 +37,22 @@ public static InternalTable deltaTable(String tableName) { mrFoxPrincipal, getProvider(getLocalStorage(mrFoxPrincipal), mrFoxPrincipal, Optional.empty())); } + + public static InternalTable s3DeltaTable(String s3TableName, S3TestConfig s3TestConfig) { + var mrFoxPrincipal = new Principal("Mr. Fox"); + return new InternalTable( + s3TableName, + Optional.empty(), + new InternalTable.DeltaTableProperties(s3DeltaTableUri(s3TableName)), + Optional.of(0L), + 0L, + mrFoxPrincipal, + 0L, + mrFoxPrincipal, + getProvider(getS3Storage(mrFoxPrincipal, s3TestConfig), mrFoxPrincipal, Optional.empty())); + } + + public static String s3DeltaTableUri(String s3TableName) { + return String.format("s3a://whitefox-s3-test-bucket/delta/samples/%s", s3TableName); + } } diff --git a/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java b/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java index 89ecf14ff..c369262f6 100644 --- a/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java +++ b/server/core/src/testFixtures/java/io/whitefox/IcebergTestUtils.java @@ -38,4 +38,38 @@ public static Metastore getLocalHadoopMetastore(Principal principal, String loca MetastoreType.HADOOP, new MetastoreProperties.HadoopMetastoreProperties(location, MetastoreType.HADOOP)); } + + public static InternalTable s3IcebergTableWithAwsGlueCatalog( + S3TestConfig s3TestConfig, + AwsGlueTestConfig awsGlueTestConfig, + String database, + String tableName) { + var mrFoxPrincipal = new Principal("Mr. Fox"); + return new InternalTable( + tableName, + Optional.empty(), + new InternalTable.IcebergTableProperties(database, tableName), + Optional.of(0L), + 0L, + mrFoxPrincipal, + 0L, + mrFoxPrincipal, + getProvider( + getS3Storage(mrFoxPrincipal, s3TestConfig), + mrFoxPrincipal, + Optional.of( + getAwsGlueMetastore(mrFoxPrincipal, awsGlueTestConfig.catalogId(), s3TestConfig)))); + } + + public static Metastore getAwsGlueMetastore( + Principal principal, String catalogId, S3TestConfig s3TestConfig) { + return getMetastore( + principal, + MetastoreType.GLUE, + new MetastoreProperties.GlueMetastoreProperties( + catalogId, + new AwsCredentials.SimpleAwsCredentials( + s3TestConfig.accessKey(), s3TestConfig.secretKey(), s3TestConfig.region()), + MetastoreType.GLUE)); + } } diff --git a/server/core/src/testFixtures/java/io/whitefox/TestUtils.java b/server/core/src/testFixtures/java/io/whitefox/TestUtils.java index 7b10568c3..1603f382e 100644 --- a/server/core/src/testFixtures/java/io/whitefox/TestUtils.java +++ b/server/core/src/testFixtures/java/io/whitefox/TestUtils.java @@ -48,4 +48,20 @@ public static Metastore getMetastore( 0L, principal); } + + public static Storage getS3Storage(Principal principal, S3TestConfig s3TestConfig) { + return new Storage( + "storage", + Optional.empty(), + principal, + StorageType.S3, + Optional.empty(), + "uri", + 0L, + principal, + 0L, + principal, + new StorageProperties.S3Properties(new AwsCredentials.SimpleAwsCredentials( + s3TestConfig.accessKey(), s3TestConfig.secretKey(), s3TestConfig.region()))); + } } diff --git a/server/core/src/testFixtures/resources/application.properties b/server/core/src/testFixtures/resources/application.properties index f76146c04..b289337ff 100644 --- a/server/core/src/testFixtures/resources/application.properties +++ b/server/core/src/testFixtures/resources/application.properties @@ -4,7 +4,4 @@ io.delta.sharing.api.server.defaultMaxResults=10 whitefox.provider.aws.test.region=${WHITEFOX_TEST_AWS_REGION} whitefox.provider.aws.test.accessKey=${WHITEFOX_TEST_AWS_ACCESS_KEY_ID} whitefox.provider.aws.test.secretKey=${WHITEFOX_TEST_AWS_SECRET_ACCESS_KEY} - -personal.provider.aws.test.region=${TEST_AWS_REGION} -personal.provider.aws.test.accessKey=${TEST_AWS_ACCESS_KEY_ID} -personal.provider.aws.test.secretKey=${TEST_AWS_SECRET_ACCESS_KEY} \ No newline at end of file +whitefox.provider.aws.test.glue.catalog.id=${WHITEFOX_TEST_GLUE_CATALOG_ID} \ No newline at end of file From b2c6b92c0afbf2578035a1fb52ff3be32a695d24 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Tue, 23 Jan 2024 21:02:13 +0100 Subject: [PATCH 16/20] factor out code from IcebergSharedTable: introduce IcebergCatalogHandler and AwsGlueConfigBuilder --- .../core/services/AwsGlueConfigBuilder.java | 33 +++++ .../core/services/DeltaShareTableLoader.java | 2 - .../core/services/IcebergCatalogHandler.java | 72 ++++++++++ .../core/services/IcebergSharedTable.java | 136 +----------------- .../core/services/IcebergTableLoader.java | 52 ++++++- .../core/services/TableLoaderFactoryImpl.java | 3 +- .../core/services/IcebergTableLoaderTest.java | 3 +- 7 files changed, 164 insertions(+), 137 deletions(-) create mode 100644 server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java create mode 100644 server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java diff --git a/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java b/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java new file mode 100644 index 000000000..46de4037f --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java @@ -0,0 +1,33 @@ +package io.whitefox.core.services; + +import io.whitefox.core.AwsCredentials; +import io.whitefox.core.MetastoreProperties; +import io.whitefox.core.aws.utils.StaticCredentialsProvider; +import java.util.HashMap; +import java.util.Map; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.aws.AwsClientProperties; +import org.apache.iceberg.aws.AwsProperties; + +public class AwsGlueConfigBuilder { + + public Map buildConfig( + MetastoreProperties.GlueMetastoreProperties glueMetastoreProperties) { + AwsCredentials.SimpleAwsCredentials credentials = + (AwsCredentials.SimpleAwsCredentials) glueMetastoreProperties.credentials(); + Map config = new HashMap<>(); + config.put(CatalogProperties.CATALOG_IMPL, "org.apache.iceberg.aws.glue.GlueCatalog"); + config.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.aws.s3.S3FileIO"); + config.put(AwsProperties.GLUE_CATALOG_ID, glueMetastoreProperties.catalogId()); + config.put(AwsClientProperties.CLIENT_REGION, credentials.region()); + config.put( + AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, StaticCredentialsProvider.class.getName()); + config.put( + String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "accessKeyId"), + credentials.awsAccessKeyId()); + config.put( + String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "secretAccessKey"), + credentials.awsSecretAccessKey()); + return config; + } +} diff --git a/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java b/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java index baccc1a26..67d80791f 100644 --- a/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java +++ b/server/core/src/main/java/io/whitefox/core/services/DeltaShareTableLoader.java @@ -1,9 +1,7 @@ package io.whitefox.core.services; import io.whitefox.core.SharedTable; -import jakarta.enterprise.context.ApplicationScoped; -@ApplicationScoped public class DeltaShareTableLoader implements TableLoader { @Override diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java b/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java new file mode 100644 index 000000000..4ded3166f --- /dev/null +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java @@ -0,0 +1,72 @@ +package io.whitefox.core.services; + +import io.whitefox.core.Metastore; +import io.whitefox.core.MetastoreProperties; +import io.whitefox.core.Storage; +import java.io.IOException; +import java.util.Map; +import org.apache.iceberg.BaseMetastoreCatalog; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.Table; +import org.apache.iceberg.aws.glue.GlueCatalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.hadoop.HadoopCatalog; + +public class IcebergCatalogHandler { + + private final AwsGlueConfigBuilder awsGlueConfigBuilder; + + public IcebergCatalogHandler(AwsGlueConfigBuilder awsGlueConfigBuilder) { + this.awsGlueConfigBuilder = awsGlueConfigBuilder; + } + + public Table loadTableWithGlueCatalog( + Metastore metastore, + Storage storage, + TableIdentifier tableIdentifier, + HadoopConfigBuilder hadoopConfigBuilder) { + try (var catalog = new GlueCatalog()) { + catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); + catalog.initialize( + metastore.name(), + awsGlueConfigBuilder.buildConfig( + (MetastoreProperties.GlueMetastoreProperties) metastore.properties())); + return loadTable(catalog, tableIdentifier); + } catch (IOException e) { + throw new RuntimeException("Unexpected error when closing the Glue catalog", e); + } + } + + public Table loadTableWithHadoopCatalog( + Metastore metastore, + Storage storage, + TableIdentifier tableIdentifier, + HadoopConfigBuilder hadoopConfigBuilder) { + try (var catalog = new HadoopCatalog()) { + catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); + catalog.initialize( + metastore.name(), + Map.of( + CatalogProperties.WAREHOUSE_LOCATION, + ((MetastoreProperties.HadoopMetastoreProperties) metastore.properties()).location())); + return loadTable(catalog, tableIdentifier); + } catch (IOException e) { + throw new RuntimeException("Unexpected error when closing the Hadoop catalog", e); + } + } + + private Table loadTable(BaseMetastoreCatalog catalog, TableIdentifier tableIdentifier) { + try { + return catalog.loadTable(tableIdentifier); + } catch (NoSuchTableException e) { + throw new IllegalArgumentException(String.format( + "Cannot found iceberg table [%s] under namespace [%s]", + tableIdentifier.name(), tableIdentifier.namespace())); + } catch (Throwable e) { + throw new RuntimeException(String.format( + "Unexpected exception when loading the iceberg table [%s] under namespace [%s]", + tableIdentifier.name(), tableIdentifier.namespace())); + } + } +} diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java index 8afcb368d..a5efce6f3 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java @@ -1,21 +1,11 @@ package io.whitefox.core.services; -import io.whitefox.core.*; -import io.whitefox.core.aws.utils.StaticCredentialsProvider; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; +import io.whitefox.core.Metadata; +import io.whitefox.core.ReadTableRequest; +import io.whitefox.core.ReadTableResultToBeSigned; import java.util.Optional; import org.apache.commons.lang3.NotImplementedException; -import org.apache.iceberg.BaseMetastoreCatalog; -import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.Table; -import org.apache.iceberg.aws.AwsClientProperties; -import org.apache.iceberg.aws.AwsProperties; -import org.apache.iceberg.aws.glue.GlueCatalog; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.NoSuchTableException; -import org.apache.iceberg.hadoop.HadoopCatalog; public class IcebergSharedTable implements AbstractSharedTable { @@ -28,104 +18,12 @@ private IcebergSharedTable(Table icebergTable, TableSchemaConverter tableSchemaC } public static IcebergSharedTable of( - SharedTable tableDetails, - TableSchemaConverter tableSchemaConverter, - HadoopConfigBuilder hadoopConfigBuilder) { - - if (tableDetails.internalTable().properties() instanceof InternalTable.IcebergTableProperties) { - var metastore = getMetastore(tableDetails.internalTable()); - var tableId = getTableIdentifier(tableDetails.internalTable()); - var table = loadTable( - metastore, - tableDetails.internalTable().provider().storage(), - tableId, - hadoopConfigBuilder); - return new IcebergSharedTable(table, tableSchemaConverter); - } else { - throw new IllegalArgumentException( - String.format("%s is not an iceberg table", tableDetails.name())); - } - } - - private static TableIdentifier getTableIdentifier(InternalTable internalTable) { - var icebergTableProperties = - ((InternalTable.IcebergTableProperties) internalTable.properties()); - return TableIdentifier.of( - icebergTableProperties.databaseName(), icebergTableProperties.tableName()); - } - - private static Metastore getMetastore(InternalTable internalTable) { - return internalTable - .provider() - .metastore() - .orElseThrow(() -> new RuntimeException( - String.format("missing metastore for the iceberg table: [%s]", internalTable.name()))); - } - - private static Table loadTable( - Metastore metastore, - Storage storage, - TableIdentifier tableIdentifier, - HadoopConfigBuilder hadoopConfigBuilder) { - if (metastore.type() == MetastoreType.GLUE) { - return loadTableWithGlueCatalog(metastore, storage, tableIdentifier, hadoopConfigBuilder); - } else if (metastore.type() == MetastoreType.HADOOP) { - return loadTableWithHadoopCatalog(metastore, storage, tableIdentifier, hadoopConfigBuilder); - } else { - throw new RuntimeException(String.format("Unknown metastore type: [%s]", metastore.type())); - } + Table icebergTable, TableSchemaConverter tableSchemaConverter) { + return new IcebergSharedTable(icebergTable, tableSchemaConverter); } - private static Table loadTableWithGlueCatalog( - Metastore metastore, - Storage storage, - TableIdentifier tableIdentifier, - HadoopConfigBuilder hadoopConfigBuilder) { - try (var catalog = new GlueCatalog()) { - catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); - catalog.initialize( - metastore.name(), - setGlueProperties((MetastoreProperties.GlueMetastoreProperties) metastore.properties())); - return loadTable(catalog, tableIdentifier); - } catch (IOException e) { - throw new RuntimeException("Unexpected error when closing the Glue catalog", e); - } - } - - private static Table loadTableWithHadoopCatalog( - Metastore metastore, - Storage storage, - TableIdentifier tableIdentifier, - HadoopConfigBuilder hadoopConfigBuilder) { - try (var catalog = new HadoopCatalog()) { - catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); - catalog.initialize( - metastore.name(), - Map.of( - CatalogProperties.WAREHOUSE_LOCATION, - ((MetastoreProperties.HadoopMetastoreProperties) metastore.properties()).location())); - return loadTable(catalog, tableIdentifier); - } catch (IOException e) { - throw new RuntimeException("Unexpected error when closing the Hadoop catalog", e); - } - } - - private static Table loadTable(BaseMetastoreCatalog catalog, TableIdentifier tableIdentifier) { - try { - return catalog.loadTable(tableIdentifier); - } catch (NoSuchTableException e) { - throw new IllegalArgumentException(String.format( - "Cannot found iceberg table [%s] under namespace [%s]", - tableIdentifier.name(), tableIdentifier.namespace())); - } catch (Throwable e) { - throw new RuntimeException(String.format( - "Unexpected exception when loading the iceberg table [%s] under namespace [%s]", - tableIdentifier.name(), tableIdentifier.namespace())); - } - } - - public static IcebergSharedTable of(SharedTable sharedTable) { - return of(sharedTable, new TableSchemaConverter(), new HadoopConfigBuilder()); + public static IcebergSharedTable of(Table icebergTable) { + return new IcebergSharedTable(icebergTable, new TableSchemaConverter()); } public Optional getMetadata(Optional startingTimestamp) { @@ -141,24 +39,4 @@ public Optional getTableVersion(Optional startingTimestamp) { public ReadTableResultToBeSigned queryTable(ReadTableRequest readTableRequest) { throw new NotImplementedException(); } - - private static Map setGlueProperties( - MetastoreProperties.GlueMetastoreProperties glueMetastoreProperties) { - AwsCredentials.SimpleAwsCredentials credentials = - (AwsCredentials.SimpleAwsCredentials) glueMetastoreProperties.credentials(); - Map properties = new HashMap<>(); - properties.put(CatalogProperties.CATALOG_IMPL, "org.apache.iceberg.aws.glue.GlueCatalog"); - properties.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.aws.s3.S3FileIO"); - properties.put(AwsProperties.GLUE_CATALOG_ID, glueMetastoreProperties.catalogId()); - properties.put(AwsClientProperties.CLIENT_REGION, credentials.region()); - properties.put( - AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, StaticCredentialsProvider.class.getName()); - properties.put( - String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "accessKeyId"), - credentials.awsAccessKeyId()); - properties.put( - String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "secretAccessKey"), - credentials.awsSecretAccessKey()); - return properties; - } } diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java index 535fcfdcf..63b6240ae 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java @@ -1,13 +1,57 @@ package io.whitefox.core.services; -import io.whitefox.core.SharedTable; -import jakarta.enterprise.context.ApplicationScoped; +import io.whitefox.core.*; +import org.apache.iceberg.catalog.TableIdentifier; -@ApplicationScoped public class IcebergTableLoader implements TableLoader { + private final IcebergCatalogHandler icebergCatalogHandler; + private final HadoopConfigBuilder hadoopConfigBuilder; + + public IcebergTableLoader( + IcebergCatalogHandler icebergCatalogHandler, HadoopConfigBuilder hadoopConfigBuilder) { + this.icebergCatalogHandler = icebergCatalogHandler; + this.hadoopConfigBuilder = hadoopConfigBuilder; + } + @Override public IcebergSharedTable loadTable(SharedTable sharedTable) { - return IcebergSharedTable.of(sharedTable); + if (sharedTable.internalTable().properties() instanceof InternalTable.IcebergTableProperties) { + var metastore = getMetastore(sharedTable.internalTable()); + var tableId = getTableIdentifier(sharedTable.internalTable()); + if (metastore.type() == MetastoreType.GLUE) { + return IcebergSharedTable.of(icebergCatalogHandler.loadTableWithGlueCatalog( + metastore, + sharedTable.internalTable().provider().storage(), + tableId, + hadoopConfigBuilder)); + } else if (metastore.type() == MetastoreType.HADOOP) { + return IcebergSharedTable.of(icebergCatalogHandler.loadTableWithHadoopCatalog( + metastore, + sharedTable.internalTable().provider().storage(), + tableId, + hadoopConfigBuilder)); + } else { + throw new RuntimeException(String.format("Unknown metastore type: [%s]", metastore.type())); + } + } else { + throw new IllegalArgumentException( + String.format("%s is not an iceberg table", sharedTable.name())); + } + } + + private TableIdentifier getTableIdentifier(InternalTable internalTable) { + var icebergTableProperties = + ((InternalTable.IcebergTableProperties) internalTable.properties()); + return TableIdentifier.of( + icebergTableProperties.databaseName(), icebergTableProperties.tableName()); + } + + private Metastore getMetastore(InternalTable internalTable) { + return internalTable + .provider() + .metastore() + .orElseThrow(() -> new RuntimeException( + String.format("missing metastore for the iceberg table: [%s]", internalTable.name()))); } } diff --git a/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java index 92daef652..69c1f7ddf 100644 --- a/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java +++ b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java @@ -11,7 +11,8 @@ public TableLoader newTableLoader(InternalTable internalTable) { if (internalTable.properties() instanceof InternalTable.DeltaTableProperties) { return new DeltaShareTableLoader(); } else if (internalTable.properties() instanceof InternalTable.IcebergTableProperties) { - return new IcebergTableLoader(); + return new IcebergTableLoader( + new IcebergCatalogHandler(new AwsGlueConfigBuilder()), new HadoopConfigBuilder()); } else throw new RuntimeException(String.format("unknown table [%s]", internalTable.name())); } } diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java index 49a7ff915..2c7bd2fb1 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java @@ -14,7 +14,8 @@ public class IcebergTableLoaderTest { - private final IcebergTableLoader icebergTableLoader = new IcebergTableLoader(); + private final IcebergTableLoader icebergTableLoader = new IcebergTableLoader( + new IcebergCatalogHandler(new AwsGlueConfigBuilder()), new HadoopConfigBuilder()); private final S3TestConfig s3TestConfig = S3TestConfig.loadFromEnv(); private final AwsGlueTestConfig awsGlueTestConfig = AwsGlueTestConfig.loadFromEnv(); From 07239074a212018b05f28571997ea2db877ebb43 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Tue, 23 Jan 2024 21:18:38 +0100 Subject: [PATCH 17/20] move HadoopConfigBuilder into IcebergCatalogHandler --- .../core/services/IcebergCatalogHandler.java | 16 +++++++-------- .../core/services/IcebergTableLoader.java | 20 +++++++------------ .../core/services/TableLoaderFactoryImpl.java | 2 +- .../core/services/IcebergTableLoaderTest.java | 2 +- 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java b/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java index 4ded3166f..e39cbe487 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java @@ -17,15 +17,16 @@ public class IcebergCatalogHandler { private final AwsGlueConfigBuilder awsGlueConfigBuilder; - public IcebergCatalogHandler(AwsGlueConfigBuilder awsGlueConfigBuilder) { + private final HadoopConfigBuilder hadoopConfigBuilder; + + public IcebergCatalogHandler( + AwsGlueConfigBuilder awsGlueConfigBuilder, HadoopConfigBuilder hadoopConfigBuilder) { this.awsGlueConfigBuilder = awsGlueConfigBuilder; + this.hadoopConfigBuilder = hadoopConfigBuilder; } public Table loadTableWithGlueCatalog( - Metastore metastore, - Storage storage, - TableIdentifier tableIdentifier, - HadoopConfigBuilder hadoopConfigBuilder) { + Metastore metastore, Storage storage, TableIdentifier tableIdentifier) { try (var catalog = new GlueCatalog()) { catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); catalog.initialize( @@ -39,10 +40,7 @@ public Table loadTableWithGlueCatalog( } public Table loadTableWithHadoopCatalog( - Metastore metastore, - Storage storage, - TableIdentifier tableIdentifier, - HadoopConfigBuilder hadoopConfigBuilder) { + Metastore metastore, Storage storage, TableIdentifier tableIdentifier) { try (var catalog = new HadoopCatalog()) { catalog.setConf(hadoopConfigBuilder.buildConfig(storage)); catalog.initialize( diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java index 63b6240ae..800b80f57 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java @@ -1,17 +1,17 @@ package io.whitefox.core.services; -import io.whitefox.core.*; +import io.whitefox.core.InternalTable; +import io.whitefox.core.Metastore; +import io.whitefox.core.MetastoreType; +import io.whitefox.core.SharedTable; import org.apache.iceberg.catalog.TableIdentifier; public class IcebergTableLoader implements TableLoader { private final IcebergCatalogHandler icebergCatalogHandler; - private final HadoopConfigBuilder hadoopConfigBuilder; - public IcebergTableLoader( - IcebergCatalogHandler icebergCatalogHandler, HadoopConfigBuilder hadoopConfigBuilder) { + public IcebergTableLoader(IcebergCatalogHandler icebergCatalogHandler) { this.icebergCatalogHandler = icebergCatalogHandler; - this.hadoopConfigBuilder = hadoopConfigBuilder; } @Override @@ -21,16 +21,10 @@ public IcebergSharedTable loadTable(SharedTable sharedTable) { var tableId = getTableIdentifier(sharedTable.internalTable()); if (metastore.type() == MetastoreType.GLUE) { return IcebergSharedTable.of(icebergCatalogHandler.loadTableWithGlueCatalog( - metastore, - sharedTable.internalTable().provider().storage(), - tableId, - hadoopConfigBuilder)); + metastore, sharedTable.internalTable().provider().storage(), tableId)); } else if (metastore.type() == MetastoreType.HADOOP) { return IcebergSharedTable.of(icebergCatalogHandler.loadTableWithHadoopCatalog( - metastore, - sharedTable.internalTable().provider().storage(), - tableId, - hadoopConfigBuilder)); + metastore, sharedTable.internalTable().provider().storage(), tableId)); } else { throw new RuntimeException(String.format("Unknown metastore type: [%s]", metastore.type())); } diff --git a/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java index 69c1f7ddf..379c98ae6 100644 --- a/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java +++ b/server/core/src/main/java/io/whitefox/core/services/TableLoaderFactoryImpl.java @@ -12,7 +12,7 @@ public TableLoader newTableLoader(InternalTable internalTable) { return new DeltaShareTableLoader(); } else if (internalTable.properties() instanceof InternalTable.IcebergTableProperties) { return new IcebergTableLoader( - new IcebergCatalogHandler(new AwsGlueConfigBuilder()), new HadoopConfigBuilder()); + new IcebergCatalogHandler(new AwsGlueConfigBuilder(), new HadoopConfigBuilder())); } else throw new RuntimeException(String.format("unknown table [%s]", internalTable.name())); } } diff --git a/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java index 2c7bd2fb1..e3633bfb0 100644 --- a/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java +++ b/server/core/src/test/java/io/whitefox/core/services/IcebergTableLoaderTest.java @@ -15,7 +15,7 @@ public class IcebergTableLoaderTest { private final IcebergTableLoader icebergTableLoader = new IcebergTableLoader( - new IcebergCatalogHandler(new AwsGlueConfigBuilder()), new HadoopConfigBuilder()); + new IcebergCatalogHandler(new AwsGlueConfigBuilder(), new HadoopConfigBuilder())); private final S3TestConfig s3TestConfig = S3TestConfig.loadFromEnv(); private final AwsGlueTestConfig awsGlueTestConfig = AwsGlueTestConfig.loadFromEnv(); From c3040af37fad3b9b7ae0374fc501509a7ce18050 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Wed, 24 Jan 2024 10:22:30 +0100 Subject: [PATCH 18/20] improve error handling --- .../io/whitefox/core/MetastoreProperties.java | 2 +- .../aws/utils/StaticCredentialsProvider.java | 4 +- .../core/services/AwsGlueConfigBuilder.java | 39 +++++++++++-------- .../core/services/IcebergCatalogHandler.java | 2 +- .../core/services/IcebergTableLoader.java | 3 +- 5 files changed, 29 insertions(+), 21 deletions(-) diff --git a/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java b/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java index 425befaf5..91c7fc5bf 100644 --- a/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java +++ b/server/core/src/main/java/io/whitefox/core/MetastoreProperties.java @@ -58,7 +58,7 @@ final class HadoopMetastoreProperties implements MetastoreProperties { public HadoopMetastoreProperties(String location, MetastoreType type) { if (type != MetastoreType.HADOOP) { throw new IllegalArgumentException(String.format( - "GlueMetastore properties are not compatible with metastore of type %o", type)); + "Hadoop metatstore properties are not compatible with metastore of type %o", type)); } this.location = location; } diff --git a/server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java b/server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java index fa4bae221..35daf1040 100644 --- a/server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java +++ b/server/core/src/main/java/io/whitefox/core/aws/utils/StaticCredentialsProvider.java @@ -16,9 +16,9 @@ public static AwsCredentialsProvider create(Map properties) { private static AwsCredentials retrieveCredentials(Map properties) { if (!properties.containsKey("accessKeyId")) { - throw new RuntimeException("accessKeyId not found"); + throw new IllegalArgumentException("accessKeyId not found"); } else if (!properties.containsKey("secretAccessKey")) { - throw new RuntimeException("secretAccessKey not found"); + throw new IllegalArgumentException("secretAccessKey not found"); } return AwsBasicCredentials.create( properties.get("accessKeyId"), properties.get("secretAccessKey")); diff --git a/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java b/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java index 46de4037f..7f1b82e5a 100644 --- a/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java +++ b/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java @@ -13,21 +13,28 @@ public class AwsGlueConfigBuilder { public Map buildConfig( MetastoreProperties.GlueMetastoreProperties glueMetastoreProperties) { - AwsCredentials.SimpleAwsCredentials credentials = - (AwsCredentials.SimpleAwsCredentials) glueMetastoreProperties.credentials(); - Map config = new HashMap<>(); - config.put(CatalogProperties.CATALOG_IMPL, "org.apache.iceberg.aws.glue.GlueCatalog"); - config.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.aws.s3.S3FileIO"); - config.put(AwsProperties.GLUE_CATALOG_ID, glueMetastoreProperties.catalogId()); - config.put(AwsClientProperties.CLIENT_REGION, credentials.region()); - config.put( - AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, StaticCredentialsProvider.class.getName()); - config.put( - String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "accessKeyId"), - credentials.awsAccessKeyId()); - config.put( - String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "secretAccessKey"), - credentials.awsSecretAccessKey()); - return config; + try { + AwsCredentials.SimpleAwsCredentials credentials = + (AwsCredentials.SimpleAwsCredentials) glueMetastoreProperties.credentials(); + Map config = new HashMap<>(); + config.put(CatalogProperties.CATALOG_IMPL, "org.apache.iceberg.aws.glue.GlueCatalog"); + config.put(CatalogProperties.FILE_IO_IMPL, "org.apache.iceberg.aws.s3.S3FileIO"); + config.put(AwsProperties.GLUE_CATALOG_ID, glueMetastoreProperties.catalogId()); + config.put(AwsClientProperties.CLIENT_REGION, credentials.region()); + config.put( + AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, + StaticCredentialsProvider.class.getName()); + config.put( + String.format("%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "accessKeyId"), + credentials.awsAccessKeyId()); + config.put( + String.format( + "%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "secretAccessKey"), + credentials.awsSecretAccessKey()); + return config; + } catch (Exception e) { + throw new IllegalArgumentException(String.format( + "Credentials not found on GlueMetastoreProperties [%s]", glueMetastoreProperties)); + } } } diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java b/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java index e39cbe487..df7698623 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergCatalogHandler.java @@ -59,7 +59,7 @@ private Table loadTable(BaseMetastoreCatalog catalog, TableIdentifier tableIdent return catalog.loadTable(tableIdentifier); } catch (NoSuchTableException e) { throw new IllegalArgumentException(String.format( - "Cannot found iceberg table [%s] under namespace [%s]", + "Cannot find iceberg table [%s] under namespace [%s]", tableIdentifier.name(), tableIdentifier.namespace())); } catch (Throwable e) { throw new RuntimeException(String.format( diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java index 800b80f57..640c13547 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergTableLoader.java @@ -26,7 +26,8 @@ public IcebergSharedTable loadTable(SharedTable sharedTable) { return IcebergSharedTable.of(icebergCatalogHandler.loadTableWithHadoopCatalog( metastore, sharedTable.internalTable().provider().storage(), tableId)); } else { - throw new RuntimeException(String.format("Unknown metastore type: [%s]", metastore.type())); + throw new RuntimeException(String.format( + "Metastore type: [%s] not compatible with Iceberg table", metastore.type())); } } else { throw new IllegalArgumentException( From 4f3b50ed5a454eb135794a99f3c8e9d70ac28412 Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Wed, 24 Jan 2024 11:39:19 +0100 Subject: [PATCH 19/20] fix class cast check --- .../io/whitefox/core/services/AwsGlueConfigBuilder.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java b/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java index 7f1b82e5a..3f788e5de 100644 --- a/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java +++ b/server/core/src/main/java/io/whitefox/core/services/AwsGlueConfigBuilder.java @@ -13,7 +13,7 @@ public class AwsGlueConfigBuilder { public Map buildConfig( MetastoreProperties.GlueMetastoreProperties glueMetastoreProperties) { - try { + if (glueMetastoreProperties.credentials() instanceof AwsCredentials.SimpleAwsCredentials) { AwsCredentials.SimpleAwsCredentials credentials = (AwsCredentials.SimpleAwsCredentials) glueMetastoreProperties.credentials(); Map config = new HashMap<>(); @@ -32,9 +32,9 @@ public Map buildConfig( "%s.%s", AwsClientProperties.CLIENT_CREDENTIALS_PROVIDER, "secretAccessKey"), credentials.awsSecretAccessKey()); return config; - } catch (Exception e) { + } else { throw new IllegalArgumentException(String.format( - "Credentials not found on GlueMetastoreProperties [%s]", glueMetastoreProperties)); + "Credentials type not supported with glue metastore %s", glueMetastoreProperties)); } } } From 393c598359881828e1e9829ab41afbd119ee3b9f Mon Sep 17 00:00:00 2001 From: Marco Scalzo Date: Wed, 24 Jan 2024 11:51:14 +0100 Subject: [PATCH 20/20] rename AbstractSharedTable to InternalSharedTable --- .../main/java/io/whitefox/core/services/DeltaSharedTable.java | 2 +- .../main/java/io/whitefox/core/services/IcebergSharedTable.java | 2 +- .../{AbstractSharedTable.java => InternalSharedTable.java} | 2 +- .../src/main/java/io/whitefox/core/services/TableLoader.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename server/core/src/main/java/io/whitefox/core/services/{AbstractSharedTable.java => InternalSharedTable.java} (91%) diff --git a/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java index 107eb9e92..0cf6a26ea 100644 --- a/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java +++ b/server/core/src/main/java/io/whitefox/core/services/DeltaSharedTable.java @@ -9,7 +9,7 @@ import java.util.Optional; import java.util.stream.Collectors; -public class DeltaSharedTable implements AbstractSharedTable { +public class DeltaSharedTable implements InternalSharedTable { private final DeltaLog deltaLog; private final TableSchemaConverter tableSchemaConverter; diff --git a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java index a5efce6f3..76c515b10 100644 --- a/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java +++ b/server/core/src/main/java/io/whitefox/core/services/IcebergSharedTable.java @@ -7,7 +7,7 @@ import org.apache.commons.lang3.NotImplementedException; import org.apache.iceberg.Table; -public class IcebergSharedTable implements AbstractSharedTable { +public class IcebergSharedTable implements InternalSharedTable { private final Table icebergTable; private final TableSchemaConverter tableSchemaConverter; diff --git a/server/core/src/main/java/io/whitefox/core/services/AbstractSharedTable.java b/server/core/src/main/java/io/whitefox/core/services/InternalSharedTable.java similarity index 91% rename from server/core/src/main/java/io/whitefox/core/services/AbstractSharedTable.java rename to server/core/src/main/java/io/whitefox/core/services/InternalSharedTable.java index 2c456576b..5f9904c7f 100644 --- a/server/core/src/main/java/io/whitefox/core/services/AbstractSharedTable.java +++ b/server/core/src/main/java/io/whitefox/core/services/InternalSharedTable.java @@ -5,7 +5,7 @@ import io.whitefox.core.ReadTableResultToBeSigned; import java.util.Optional; -public interface AbstractSharedTable { +public interface InternalSharedTable { Optional getMetadata(Optional startingTimestamp); diff --git a/server/core/src/main/java/io/whitefox/core/services/TableLoader.java b/server/core/src/main/java/io/whitefox/core/services/TableLoader.java index b7efad80d..03131a3e6 100644 --- a/server/core/src/main/java/io/whitefox/core/services/TableLoader.java +++ b/server/core/src/main/java/io/whitefox/core/services/TableLoader.java @@ -4,5 +4,5 @@ public interface TableLoader { - AbstractSharedTable loadTable(SharedTable sharedTable); + InternalSharedTable loadTable(SharedTable sharedTable); }