From 0172b3480ff420d15ccd7bc5b4977cff8b28de68 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Mon, 5 Feb 2024 15:01:15 +0100 Subject: [PATCH 01/22] Initial import of authorizer --- pom.xml | 6 + .../hadoop/HadoopConfigSingleton.java | 12 ++ .../tech/stackable/hadoop/OpaAllowQuery.java | 51 ++++++ .../tech/stackable/hadoop/OpaException.java | 26 +++ .../{OpaQuery.java => OpaGroupsQuery.java} | 14 +- .../tech/stackable/hadoop/OpaQueryUgi.java | 32 ++++ .../StackableAccessControlEnforcer.java | 150 ++++++++++++++++++ .../stackable/hadoop/StackableAuthorizer.java | 32 ++++ .../hadoop/StackableGroupMapper.java | 53 ++++--- test/stack/10-hdfs.yaml | 1 + 10 files changed, 344 insertions(+), 33 deletions(-) create mode 100644 src/main/java/tech/stackable/hadoop/HadoopConfigSingleton.java create mode 100644 src/main/java/tech/stackable/hadoop/OpaAllowQuery.java rename src/main/java/tech/stackable/hadoop/{OpaQuery.java => OpaGroupsQuery.java} (50%) create mode 100644 src/main/java/tech/stackable/hadoop/OpaQueryUgi.java create mode 100644 src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java create mode 100644 src/main/java/tech/stackable/hadoop/StackableAuthorizer.java diff --git a/pom.xml b/pom.xml index a082592..a8d6edc 100644 --- a/pom.xml +++ b/pom.xml @@ -59,6 +59,12 @@ ${hadoop.version} provided + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + provided + org.slf4j slf4j-api diff --git a/src/main/java/tech/stackable/hadoop/HadoopConfigSingleton.java b/src/main/java/tech/stackable/hadoop/HadoopConfigSingleton.java new file mode 100644 index 0000000..5419681 --- /dev/null +++ b/src/main/java/tech/stackable/hadoop/HadoopConfigSingleton.java @@ -0,0 +1,12 @@ +package tech.stackable.hadoop; + +import org.apache.hadoop.conf.Configuration; + +public enum HadoopConfigSingleton { + INSTANCE; + private final Configuration configuration = new Configuration(); + + public Configuration getConfiguration() { + return this.configuration; + } +} diff --git a/src/main/java/tech/stackable/hadoop/OpaAllowQuery.java b/src/main/java/tech/stackable/hadoop/OpaAllowQuery.java new file mode 100644 index 0000000..1c6737c --- /dev/null +++ b/src/main/java/tech/stackable/hadoop/OpaAllowQuery.java @@ -0,0 +1,51 @@ +package tech.stackable.hadoop; + +import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider; + +public class OpaAllowQuery { + public final OpaAllowQueryInput input; + + public OpaAllowQuery(OpaAllowQueryInput input) { + this.input = input; + } + + public static class OpaAllowQueryInput { + public java.lang.String fsOwner; + public java.lang.String supergroup; + public OpaQueryUgi callerUgi; + public org.apache.hadoop.hdfs.server.namenode.INodeAttributes[] inodeAttrs; + public org.apache.hadoop.hdfs.server.namenode.INode[] inodes; + public byte[][] pathByNameArr; + public int snapshotId; + public java.lang.String path; + public int ancestorIndex; + public boolean doCheckOwner; + public org.apache.hadoop.fs.permission.FsAction ancestorAccess; + public org.apache.hadoop.fs.permission.FsAction parentAccess; + public org.apache.hadoop.fs.permission.FsAction access; + public org.apache.hadoop.fs.permission.FsAction subAccess; + public boolean ignoreEmptyDir; + public java.lang.String operationName; + public org.apache.hadoop.ipc.CallerContext callerContext; + + public OpaAllowQueryInput(INodeAttributeProvider.AuthorizationContext context) { + this.fsOwner = context.getFsOwner(); + this.supergroup = context.getSupergroup(); + this.callerUgi = new OpaQueryUgi(context.getCallerUgi()); + this.inodeAttrs = context.getInodeAttrs(); + this.inodes = context.getInodes(); + this.pathByNameArr = context.getPathByNameArr(); + this.snapshotId = context.getSnapshotId(); + this.path = context.getPath(); + this.ancestorIndex = context.getAncestorIndex(); + this.doCheckOwner = context.isDoCheckOwner(); + this.ancestorAccess = context.getAncestorAccess(); + this.parentAccess = context.getParentAccess(); + this.access = context.getAccess(); + this.subAccess = context.getSubAccess(); + this.ignoreEmptyDir = context.isIgnoreEmptyDir(); + this.operationName = context.getOperationName(); + this.callerContext = context.getCallerContext(); + } + } +} diff --git a/src/main/java/tech/stackable/hadoop/OpaException.java b/src/main/java/tech/stackable/hadoop/OpaException.java index 0aaacc8..01f2f08 100644 --- a/src/main/java/tech/stackable/hadoop/OpaException.java +++ b/src/main/java/tech/stackable/hadoop/OpaException.java @@ -11,6 +11,15 @@ protected OpaException(String message, Throwable cause) { super(message, cause); } + public static final class UriMissing extends OpaException { + public UriMissing(String configuration) { + super("No Open Policy Agent URI provided (must be set in the configuration \"" + + configuration + + "\")", + null); + } + } + public static final class UriInvalid extends OpaException { public UriInvalid(URI uri, Throwable cause) { super( @@ -32,6 +41,11 @@ public EndPointNotFound(String url) { null); } } + public static final class QueryFailed extends OpaException { + public QueryFailed(Throwable cause) { + super("Failed to query OPA backend", cause); + } + } public static final class OpaServerError extends OpaException { public OpaServerError(String query, HttpResponse response) { @@ -45,4 +59,16 @@ public OpaServerError(String query, HttpResponse response) { null); } } + + public static final class SerializeFailed extends OpaException { + public SerializeFailed(Throwable cause) { + super("Failed to serialize OPA query context", cause); + } + } + + public static final class DeserializeFailed extends OpaException { + public DeserializeFailed(Throwable cause) { + super("Failed to deserialize OPA policy response", cause); + } + } } diff --git a/src/main/java/tech/stackable/hadoop/OpaQuery.java b/src/main/java/tech/stackable/hadoop/OpaGroupsQuery.java similarity index 50% rename from src/main/java/tech/stackable/hadoop/OpaQuery.java rename to src/main/java/tech/stackable/hadoop/OpaGroupsQuery.java index a65472b..506973a 100644 --- a/src/main/java/tech/stackable/hadoop/OpaQuery.java +++ b/src/main/java/tech/stackable/hadoop/OpaGroupsQuery.java @@ -2,30 +2,30 @@ import java.util.StringJoiner; -public class OpaQuery { - public final OpaQueryInput input; +public class OpaGroupsQuery { + public final OpaGroupsQueryInput input; - public OpaQuery(OpaQueryInput input) { + public OpaGroupsQuery(OpaGroupsQueryInput input) { this.input = input; } @Override public String toString() { - return new StringJoiner(", ", OpaQuery.class.getSimpleName() + "[", "]") + return new StringJoiner(", ", OpaGroupsQuery.class.getSimpleName() + "[", "]") .add("input=" + input) .toString(); } - public static class OpaQueryInput { + public static class OpaGroupsQueryInput { public final String username; - public OpaQueryInput(String user) { + public OpaGroupsQueryInput(String user) { this.username = user; } @Override public String toString() { - return new StringJoiner(", ", OpaQueryInput.class.getSimpleName() + "[", "]") + return new StringJoiner(", ", OpaGroupsQueryInput.class.getSimpleName() + "[", "]") .add("username='" + username + "'") .toString(); } diff --git a/src/main/java/tech/stackable/hadoop/OpaQueryUgi.java b/src/main/java/tech/stackable/hadoop/OpaQueryUgi.java new file mode 100644 index 0000000..a3e5c3b --- /dev/null +++ b/src/main/java/tech/stackable/hadoop/OpaQueryUgi.java @@ -0,0 +1,32 @@ +package tech.stackable.hadoop; + +import org.apache.hadoop.security.UserGroupInformation; + +import java.io.IOException; +import java.util.List; + +public class OpaQueryUgi { + public UserGroupInformation realUser; + public String userName; + public String shortUserName; + + public String primaryGroup; + public List groups; + + public UserGroupInformation.AuthenticationMethod authenticationMethod; + public UserGroupInformation.AuthenticationMethod realAuthenticationMethod; + + public OpaQueryUgi(UserGroupInformation ugi) { + this.realUser = ugi.getRealUser(); + this.userName = ugi.getUserName(); + this.shortUserName = ugi.getShortUserName(); + try { + this.primaryGroup = ugi.getPrimaryGroupName(); + } catch (IOException e) { + this.primaryGroup = null; + } + this.groups = ugi.getGroups(); + this.authenticationMethod = ugi.getAuthenticationMethod(); + this.realAuthenticationMethod = ugi.getRealAuthenticationMethod(); + } +} diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java new file mode 100644 index 0000000..866d3ef --- /dev/null +++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java @@ -0,0 +1,150 @@ +package tech.stackable.hadoop; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.PropertyAccessor; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; +import org.apache.hadoop.hdfs.server.namenode.INode; +import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider; +import org.apache.hadoop.hdfs.server.namenode.INodeAttributes; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Objects; + +public class StackableAccessControlEnforcer implements INodeAttributeProvider.AccessControlEnforcer { + + private static final Logger LOG = LoggerFactory.getLogger(StackableAccessControlEnforcer.class); + + public static final String OPA_POLICY_URL_PROP = "hadoop.security.authorization.opa.policy.url"; + + private final HttpClient httpClient = HttpClient.newHttpClient(); + private final ObjectMapper json; + private URI opaUri; + + public StackableAccessControlEnforcer() { + LOG.info("Starting StackableAccessControlEnforcer"); + + // Guaranteed to be only called once (Effective Java: Item 3) + Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration(); + + String opaPolicyUrl = configuration.get(OPA_POLICY_URL_PROP); + if (opaPolicyUrl == null) { + throw new OpaException.UriMissing(OPA_POLICY_URL_PROP); + } + + try { + this.opaUri = URI.create(opaPolicyUrl); + } catch (Exception e) { + throw new OpaException.UriInvalid(opaUri, e); + } + + this.json = new ObjectMapper() + // OPA server can send other fields, such as `decision_id`` when enabling decision logs + // We could add all the fields we *currently* know, but it's more future-proof to ignore any unknown fields + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + // Previously we were getting + // Caused by: com.fasterxml.jackson.databind.exc.InvalidDefinitionException: No serializer found for class org.apache.hadoop.hdfs.util.EnumCounters and no properties discovered to create BeanSerializer (to avoid exception, disable SerializationFeature.FAIL_ON_EMPTY_BEANS) (through reference chain: tech.stackable.HdfsOpaAccessControlEnforcer$ContextWrapper["inodeAttrs"]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory[0]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory["features"]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature[0]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature["spaceConsumed"]->org.apache.hadoop.hdfs.server.namenode.QuotaCounts["typeSpaces"]) + .configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false) + // Only include the needed fields. HDFS has many classes with even more circular reference to remove + .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE) + .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.PUBLIC_ONLY) + .setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.PUBLIC_ONLY) + // We need to remove some circular pointers (e.g. root -> children[0] -> parent -> root) + // Otherwise we get com.fasterxml.jackson.databind.JsonMappingException: Infinite recursion (StackOverflowError) + .addMixIn(DatanodeDescriptor.class, DatanodeDescriptorMixin.class); + + LOG.info("Started HdfsOpaAccessControlEnforcer"); + } + + private static class OpaQueryResult { + // Boxed Boolean to detect not-present vs explicitly false + public Boolean result; + } + + @Override + public void checkPermission(String fsOwner, String supergroup, + UserGroupInformation ugi, INodeAttributes[] inodeAttrs, + INode[] inodes, byte[][] pathByNameArr, int snapshotId, String path, + int ancestorIndex, boolean doCheckOwner, FsAction ancestorAccess, + FsAction parentAccess, FsAction access, FsAction subAccess, + boolean ignoreEmptyDir) throws AccessControlException { + LOG.info("checkPermission called"); + + // We are using the new "checkPermissionWithContext" API, as indicated by the log statement + // "Use the new authorization provider API". All the calls to this old function only happen when opType == null, + // in which case we have no idea on what to authorize at, so we just allow it. + // FIXME: Needs testing + + // throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API."); + } + + @Override + public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationContext authzContext) throws AccessControlException { + OpaAllowQuery query = new OpaAllowQuery(new OpaAllowQuery.OpaAllowQueryInput(authzContext)); + + String body; + try { + body = json.writeValueAsString(query); + } catch (JsonProcessingException e) { + throw new OpaException.SerializeFailed(e); + } + + LOG.debug("Request body [{}]", body); + HttpResponse response = null; + try { + response = + httpClient.send( + HttpRequest.newBuilder(opaUri) + .header("Content-Type", "application/json") + .POST(HttpRequest.BodyPublishers.ofString(body)) + .build(), + HttpResponse.BodyHandlers.ofString()); + LOG.debug("Opa response [{}]", response.body()); + } catch (Exception e) { + LOG.error(e.getMessage()); + throw new OpaException.QueryFailed(e); + } + + switch (Objects.requireNonNull(response).statusCode()) { + case 200: + break; + case 404: + throw new OpaException.EndPointNotFound(opaUri.toString()); + default: + throw new OpaException.OpaServerError(query.toString(), response); + } + + OpaQueryResult result; + try { + result = json.readValue(response.body(), OpaQueryResult.class); + } catch (JsonProcessingException e) { + throw new OpaException.DeserializeFailed(e); + } + + if (result.result == null || !result.result) { + throw new AccessControlException("OPA denied the request"); + } + } + + private abstract static class DatanodeDescriptorMixin { + @JsonIgnore + abstract INode getParent(); + @JsonIgnore + abstract DatanodeStorageInfo[] getStorageInfos(); + } +} diff --git a/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java b/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java new file mode 100644 index 0000000..f6de8e7 --- /dev/null +++ b/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java @@ -0,0 +1,32 @@ +package tech.stackable.hadoop; + +import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider; +import org.apache.hadoop.hdfs.server.namenode.INodeAttributes; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class StackableAuthorizer extends INodeAttributeProvider { + + private static final Logger LOG = LoggerFactory.getLogger(StackableAuthorizer.class); + + @Override + public void start() { + LOG.info("Starting HdfsOpaAuthorizer"); + } + + @Override + public void stop() { + LOG.info("Stopping HdfsOpaAuthorizer"); + } + + @Override + public INodeAttributes getAttributes(String[] strings, INodeAttributes iNodeAttributes) { + // No special attributes needed + return iNodeAttributes; + } + + @Override + public AccessControlEnforcer getExternalAccessControlEnforcer(AccessControlEnforcer defaultEnforcer) { + return new StackableAccessControlEnforcer(); + } +} diff --git a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java index 00a0b7e..ba53b63 100644 --- a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java +++ b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java @@ -1,6 +1,7 @@ package tech.stackable.hadoop; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; @@ -20,6 +21,7 @@ public class StackableGroupMapper implements GroupMappingServiceProvider { private static final Logger LOG = LoggerFactory.getLogger(StackableGroupMapper.class); + public static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.url"; private static final String OPA_MAPPING_GROUP_NAME_PROP = "hadoop.security.group.mapping.opa.list.name"; @@ -31,22 +33,13 @@ public class StackableGroupMapper implements GroupMappingServiceProvider { private final ObjectMapper json; private URI opaUri; - public enum HadoopConfig { - INSTANCE; - private final Configuration configuration = new Configuration(); - - public Configuration getConfiguration() { - return this.configuration; - } - } - public StackableGroupMapper() { - // guaranteed to be only called once (Effective Java: Item 3) - Configuration configuration = HadoopConfig.INSTANCE.getConfiguration(); + // Guaranteed to be only called once (Effective Java: Item 3) + Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration(); String opaMappingUrl = configuration.get(OPA_MAPPING_URL_PROP); if (opaMappingUrl == null) { - throw new RuntimeException("Config \"" + OPA_MAPPING_URL_PROP + "\" missing"); + throw new OpaException.UriMissing(OPA_MAPPING_URL_PROP); } try { @@ -70,7 +63,7 @@ public StackableGroupMapper() { // We could add all the fields we *currently* know, but it's more future-proof to ignore // any unknown fields. .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - // do not include null values + // Do not include null values .setSerializationInclusion(JsonInclude.Include.NON_NULL); } @@ -81,11 +74,17 @@ public StackableGroupMapper() { * @return list of groups for a given user */ @Override - public List getGroups(String user) throws IOException { + public List getGroups(String user) { LOG.info("Calling StackableGroupMapper.getGroups for user [{}]", user); - OpaQuery query = new OpaQuery(new OpaQuery.OpaQueryInput(user)); - String body = json.writeValueAsString(query); + OpaGroupsQuery query = new OpaGroupsQuery(new OpaGroupsQuery.OpaGroupsQueryInput(user)); + + String body; + try { + body = json.writeValueAsString(query); + } catch (JsonProcessingException e) { + throw new OpaException.SerializeFailed(e); + } LOG.debug("Request body [{}]", body); HttpResponse response = null; @@ -97,9 +96,10 @@ public List getGroups(String user) throws IOException { .POST(HttpRequest.BodyPublishers.ofString(body)) .build(), HttpResponse.BodyHandlers.ofString()); - LOG.info("Opa response [{}]", response.body()); - } catch (InterruptedException e) { + LOG.debug("Opa response [{}]", response.body()); + } catch (Exception e) { LOG.error(e.getMessage()); + throw new OpaException.QueryFailed(e); } switch (Objects.requireNonNull(response).statusCode()) { @@ -111,15 +111,16 @@ public List getGroups(String user) throws IOException { throw new OpaException.OpaServerError(query.toString(), response); } - String responseBody = response.body(); - LOG.debug("Response body [{}]", responseBody); - - @SuppressWarnings("unchecked") - Map result = - (Map) json.readValue(responseBody, HashMap.class).get(OPA_RESULT_FIELD); - List groups = (List) result.get(this.mappingGroupName); + List groups; + try { + @SuppressWarnings("unchecked") + Map result = (Map) json.readValue(response.body(), HashMap.class).get(OPA_RESULT_FIELD); + groups = (List) result.get(this.mappingGroupName); + } catch (Exception e) { + throw new OpaException.DeserializeFailed(e); + } - LOG.info("Groups for [{}]: [{}]", user, groups); + LOG.debug("Groups for [{}]: [{}]", user, groups); return groups; } diff --git a/test/stack/10-hdfs.yaml b/test/stack/10-hdfs.yaml index a03961c..52eee90 100644 --- a/test/stack/10-hdfs.yaml +++ b/test/stack/10-hdfs.yaml @@ -27,6 +27,7 @@ spec: image: productVersion: 3.3.6 custom: hdfs + pullPolicy: IfNotPresent clusterConfig: dfsReplication: 1 zookeeperConfigMapName: simple-hdfs-znode From 94fe7d517b5074545e5a33c58aa774de8057ad26 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 6 Feb 2024 10:20:16 +0100 Subject: [PATCH 02/22] Update test setup --- README.md | 2 +- Tiltfile | 7 +- test/stack/05-opa.yaml | 64 ---------------- test/stack/10-hdfs.yaml | 61 --------------- test/stack/10-opa.yaml | 12 +++ test/stack/11-rego-rules.yaml | 135 ++++++++++++++++++++++++++++++++++ test/stack/20-hdfs.yaml | 68 +++++++++++++++++ 7 files changed, 220 insertions(+), 129 deletions(-) delete mode 100644 test/stack/05-opa.yaml delete mode 100644 test/stack/10-hdfs.yaml create mode 100644 test/stack/10-opa.yaml create mode 100644 test/stack/11-rego-rules.yaml create mode 100644 test/stack/20-hdfs.yaml diff --git a/README.md b/README.md index 8a3a13f..6075aa4 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Group mappings are resolved on the NameNode and the following configuration shou #### hadoop.security.group.mapping.opa.url -* The Opa Server endpoint e.g. `"http://test-opa.default.svc.cluster.local:8081/v1/data/hdfsgroups"` +* The Opa Server endpoint e.g. `"http://opa.default.svc.cluster.local:8081/v1/data/hdfsgroups"` #### hadoop.security.group.mapping.opa.list.name diff --git a/Tiltfile b/Tiltfile index 63fed7c..a44c8a0 100644 --- a/Tiltfile +++ b/Tiltfile @@ -1,7 +1,8 @@ k8s_yaml('test/stack/01-install-krb5-kdc.yaml') k8s_yaml('test/stack/02-create-kerberos-secretclass.yaml') -k8s_yaml('test/stack/05-opa.yaml') -k8s_yaml('test/stack/10-hdfs.yaml') +k8s_yaml('test/stack/10-opa.yaml') +k8s_yaml('test/stack/11-rego-rules.yaml') +k8s_yaml('test/stack/20-hdfs.yaml') local_resource( 'compile authorizer', @@ -13,4 +14,4 @@ docker_build( './target', dockerfile='./Dockerfile') -k8s_kind('HdfsCluster', image_json_path='{.spec.image.custom}') \ No newline at end of file +k8s_kind('HdfsCluster', image_json_path='{.spec.image.custom}') diff --git a/test/stack/05-opa.yaml b/test/stack/05-opa.yaml deleted file mode 100644 index f4f4315..0000000 --- a/test/stack/05-opa.yaml +++ /dev/null @@ -1,64 +0,0 @@ ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: test - labels: - opa.stackable.tech/bundle: "hdfs-group-mapping" -data: - test.rego: | - package hdfsgroups - - # this will return the group data in this form: - # "result": { - # "groups": [ - # "admin", - # "superuser" - # ] - # ... - groups := {g | - raw = users_by_name[input.username].groups[_] - g := trim(raw, "/") - } - - # returning data in the form presented by the UIF - users_by_name := { - "alice": { - "id": "af07f12c-1234-40a7-93e0-874537bdf3f5", - "username": "alice", - "groups": ["/superset-admin"], - "customAttributes": {}, - }, - "bob": { - "id": "af07f12c-2345-40a7-93e0-874537bdf3f5", - "username": "bob", - "groups": ["/admin"], - "customAttributes": {}, - }, - "stackable": { - "id": "af07f12c-3456-40a7-93e0-874537bdf3f5", - "username": "stackable", - "groups": ["/admin", "/superuser"], - "customAttributes": {}, - }, - # Hadoop will use the short-name for group mappings - "nn": { - "id": "af07f12c-7890-40a7-93e0-874537bdf3f5", - "username": "nn", - "groups": ["/admin", "/superuser"], - "customAttributes": {}, - }, - } - ---- -apiVersion: opa.stackable.tech/v1alpha1 -kind: OpaCluster -metadata: - name: test-opa -spec: - image: - productVersion: 0.57.0 - pullPolicy: IfNotPresent - servers: - roleGroups: - default: {} diff --git a/test/stack/10-hdfs.yaml b/test/stack/10-hdfs.yaml deleted file mode 100644 index 52eee90..0000000 --- a/test/stack/10-hdfs.yaml +++ /dev/null @@ -1,61 +0,0 @@ ---- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperCluster -metadata: - name: simple-zk -spec: - image: - productVersion: 3.8.3 - servers: - roleGroups: - default: - replicas: 1 ---- -apiVersion: zookeeper.stackable.tech/v1alpha1 -kind: ZookeeperZnode -metadata: - name: simple-hdfs-znode -spec: - clusterRef: - name: simple-zk ---- -apiVersion: hdfs.stackable.tech/v1alpha1 -kind: HdfsCluster -metadata: - name: simple-hdfs -spec: - image: - productVersion: 3.3.6 - custom: hdfs - pullPolicy: IfNotPresent - clusterConfig: - dfsReplication: 1 - zookeeperConfigMapName: simple-hdfs-znode - authentication: - tlsSecretClass: tls - kerberos: - secretClass: kerberos-default - nameNodes: - roleGroups: - default: - envOverrides: - HADOOP_CLASSPATH: "/stackable/hadoop/share/hadoop/tools/lib/*.jar" - configOverrides: - core-site.xml: - # the mapper is only handled on the namenode so no need to apply this config to all roles - hadoop.security.group.mapping: "tech.stackable.hadoop.StackableGroupMapper" - hadoop.security.group.mapping.opa.url: "http://test-opa.default.svc.cluster.local:8081/v1/data/hdfsgroups" - hadoop.security.group.mapping.opa.list.name: "groups" - # The operator adds a default static mapping when kerberos is activated, see: - # https://github.com/stackabletech/hdfs-operator/blob/main/rust/operator-binary/src/kerberos.rs#L97-L101 - # This should be removed so that the mapping implementation can provide this information instead: - hadoop.user.group.static.mapping.overrides: "" - replicas: 2 - dataNodes: - roleGroups: - default: - replicas: 1 - journalNodes: - roleGroups: - default: - replicas: 1 diff --git a/test/stack/10-opa.yaml b/test/stack/10-opa.yaml new file mode 100644 index 0000000..9d3635f --- /dev/null +++ b/test/stack/10-opa.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: opa.stackable.tech/v1alpha1 +kind: OpaCluster +metadata: + name: opa +spec: + image: + productVersion: 0.61.0 # Needed for OPA rego v1 + pullPolicy: IfNotPresent + servers: + roleGroups: + default: {} diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml new file mode 100644 index 0000000..9ce98ec --- /dev/null +++ b/test/stack/11-rego-rules.yaml @@ -0,0 +1,135 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: hdfs-regorules + labels: + opa.stackable.tech/bundle: "true" +data: + hdfs.rego: | + package hdfs + + import rego.v1 + + default allow = false + + allow if { + some acl in acls + matches_identity(input.callerUgi.shortUserName, acl.identity) + matches_resource(input.path, acl.resource) + action_sufficient_for_operation(acl.action, input.operationName) + } + + # Identity mentions the user explicitly + matches_identity(user, identity) if { + identity == concat("", ["user:", user]) + } + + # Identity mentions group the user is part of + matches_identity(user, identity) if { + some group in groups[user] + identity == concat("", ["group:", group]) + } + + + # Resource mentions the file explicitly + matches_resource(file, resource) if { + resource == concat("", ["hdfs:file:", file]) + } + + # Resource mentions a folder higher up the tree, which will will grant access recursively + matches_resource(file, resource) if { + startswith(resource, "hdfs:dir:/") + # dirs need to have a trailing slash + endswith(resource, "/") + startswith(file, trim_prefix(resource, "hdfs:dir:")) + } + + action_sufficient_for_operation(action, operation) if { + action_hierarchy[action][_] == action_for_operation[operation] + } + + + action_hierarchy := { + "full": ["full", "rw","ro"], + "rw": ["rw", "ro"], + "ro": ["ro"], + } + + action_for_operation := { + "getfileinfo": "ro", + "listStatus": "ro", + "delete": "full", + } + + groups := {"HTTP": ["http-group"]} + + acls := [ + { + "identity": "user:HTTP", + "action": "full", + "resource": "hdfs:file:/hosts", + }, + { + "identity": "group:http-group", + "action": "full", + "resource": "hdfs:dir:/", + }, + { + "identity": "user:HTTP", + "action": "full", + "resource": "hdfs:dir:/ro/full/", + }, + ] + + # hdfs:file:/hosts + # hdfs:file:/hosts/andrew_* + # hdfs:dir:/hosts/ + # kafka:topic:events + # trino:table:tpch.sf1.customers + # trino:schema:tpch.sf1 + # trino:catalog:tpch + + hdfs-groups.rego: | + package hdfs_groups + + # this will return the group data in this form: + # "result": { + # "groups": [ + # "admin", + # "superuser" + # ] + # ... + groups := {g | + raw = users_by_name[input.username].groups[_] + g := trim(raw, "/") + } + + # returning data in the form presented by the UIF + users_by_name := { + "alice": { + "id": "af07f12c-1234-40a7-93e0-874537bdf3f5", + "username": "alice", + "groups": ["/superset-admin"], + "customAttributes": {}, + }, + "bob": { + "id": "af07f12c-2345-40a7-93e0-874537bdf3f5", + "username": "bob", + "groups": ["/admin"], + "customAttributes": {}, + }, + "stackable": { + "id": "af07f12c-3456-40a7-93e0-874537bdf3f5", + "username": "stackable", + "groups": ["/admin", "/superuser"], + "customAttributes": {}, + }, + # Hadoop will use the short-name for group mappings + "nn": { + "id": "af07f12c-7890-40a7-93e0-874537bdf3f5", + "username": "nn", + "groups": ["/admin", "/superuser"], + "customAttributes": {}, + }, + } diff --git a/test/stack/20-hdfs.yaml b/test/stack/20-hdfs.yaml new file mode 100644 index 0000000..df1fee6 --- /dev/null +++ b/test/stack/20-hdfs.yaml @@ -0,0 +1,68 @@ +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperCluster +metadata: + name: simple-zk +spec: + image: + productVersion: 3.8.3 + servers: + roleGroups: + default: + replicas: 1 +--- +apiVersion: zookeeper.stackable.tech/v1alpha1 +kind: ZookeeperZnode +metadata: + name: simple-hdfs-znode +spec: + clusterRef: + name: simple-zk +--- +apiVersion: hdfs.stackable.tech/v1alpha1 +kind: HdfsCluster +metadata: + name: simple-hdfs +spec: + image: + productVersion: 3.3.6 + custom: hdfs # Will be overwritten by Tilt + pullPolicy: IfNotPresent + clusterConfig: + dfsReplication: 1 + zookeeperConfigMapName: simple-hdfs-znode + authentication: + tlsSecretClass: tls + kerberos: + secretClass: kerberos-default + nameNodes: + envOverrides: &envOverrides + HADOOP_CLASSPATH: "/stackable/hadoop/share/hadoop/tools/lib/*.jar" + configOverrides: &configOverrides + hdfs-site.xml: + dfs.namenode.inode.attributes.provider.class: tech.stackable.hadoop.StackableAuthorizer + core-site.xml: + # The mapper is only handled on the namenode so no need to apply this config to all roles + hadoop.security.group.mapping: tech.stackable.hadoop.StackableGroupMapper + hadoop.security.group.mapping.opa.url: http://opa.default.svc.cluster.local:8081/v1/data/hdfs_groups + hadoop.security.group.mapping.opa.list.name: groups + hadoop.security.authorization.opa.policy.url: http://opa.default.svc.cluster.local:8081/v1/data/hdfs/allow + # The operator adds a default static mapping when kerberos is activated, see: + # https://github.com/stackabletech/hdfs-operator/blob/main/rust/operator-binary/src/kerberos.rs#L97-L101 + # This should be removed so that the mapping implementation can provide this information instead: + hadoop.user.group.static.mapping.overrides: "" + roleGroups: + default: + replicas: 2 + dataNodes: + configOverrides: *configOverrides + envOverrides: *envOverrides + roleGroups: + default: + replicas: 1 + journalNodes: + configOverrides: *configOverrides + envOverrides: *envOverrides + roleGroups: + default: + replicas: 1 From 17fc695c39bf9be482105a0c6d7d53689d6955e6 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 6 Feb 2024 10:20:33 +0100 Subject: [PATCH 03/22] Add initial rego rules --- rego/hdfs.rego | 88 ++++++++++++++++++++++++++++++ rego/hdfs_test.rego | 130 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 218 insertions(+) create mode 100644 rego/hdfs.rego create mode 100644 rego/hdfs_test.rego diff --git a/rego/hdfs.rego b/rego/hdfs.rego new file mode 100644 index 0000000..107666e --- /dev/null +++ b/rego/hdfs.rego @@ -0,0 +1,88 @@ +package hdfs + +import rego.v1 + +default allow = false + +allow if { + some acl in acls + matches_identity(input.callerUgi.shortUserName, acl.identity) + matches_resource(input.path, acl.resource) + action_sufficient_for_operation(acl.action, input.operationName) +} + +# Identity mentions the user explicitly +matches_identity(user, identity) if { + identity == concat("", ["user:", user]) +} + +# Identity mentions group the user is part of +matches_identity(user, identity) if { + some group in groups[user] + identity == concat("", ["group:", group]) +} + + +# Resource mentions the file explicitly +matches_resource(file, resource) if { + resource == concat("", ["hdfs:file:", file]) +} + +# Resource mentions a folder higher up the tree, which will will grant access recursively +matches_resource(file, resource) if { + startswith(resource, "hdfs:dir:/") + # dirs need to have a trailing slash + endswith(resource, "/") + startswith(file, trim_prefix(resource, "hdfs:dir:")) +} + +action_sufficient_for_operation(action, operation) if { + action_hierarchy[action][_] == action_for_operation[operation] +} + +action_hierarchy := { + "full": ["full", "rw","ro"], + "rw": ["rw", "ro"], + "ro": ["ro"], +} + +action_for_operation := { + "getfileinfo": "ro", + "listStatus": "ro", + "delete": "full", +} + +groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} + +acls := [ + { + "identity": "group:admins", + "action": "full", + "resource": "hdfs:dir:/", + }, + { + "identity": "group:developers", + "action": "full", + "resource": "hdfs:dir:/developers/", + }, + { + "identity": "user:alice", + "action": "full", + "resource": "hdfs:dir:/alice/", + }, + { + "identity": "user:bob", + "action": "full", + "resource": "hdfs:dir:/bob/", + }, + { + "identity": "user:bob", + "action": "ro", + "resource": "hdfs:dir:/developers/", + }, + { + "identity": "user:bob", + "action": "rw", + "resource": "hdfs:file:/developers/file-from-bob", + }, +] diff --git a/rego/hdfs_test.rego b/rego/hdfs_test.rego new file mode 100644 index 0000000..3d694f5 --- /dev/null +++ b/rego/hdfs_test.rego @@ -0,0 +1,130 @@ +package hdfs + +import rego.v1 + +test_admin_access_to_slash if { + allow with input as { + "callerUgi": { + "shortUserName": "admin" + }, + "path": "/top-level", + "operationName": "delete", + } +} + +test_admin_access_to_alice if { + allow with input as { + "callerUgi": { + "shortUserName": "admin" + }, + "path": "/alice/file", + "operationName": "delete", + } +} + + +test_admin_access_to_alice_nested_file if { + allow with input as { + "callerUgi": { + "shortUserName": "admin" + }, + "path": "/alice/nested/file", + "operationName": "delete", + } +} + +test_admin_access_to_developers if { + allow with input as { + "callerUgi": { + "shortUserName": "admin" + }, + "path": "/developers/file", + "operationName": "getfileinfo", + } +} + + + +test_alice_access_to_alice if { + allow with input as { + "callerUgi": { + "shortUserName": "alice" + }, + "path": "/alice/file", + "operationName": "delete", + } +} + +test_alice_no_access_to_bob if { + not allow with input as { + "callerUgi": { + "shortUserName": "alice" + }, + "path": "/bob/file", + "operationName": "delete", + } +} + +test_alice_access_to_developers if { + allow with input as { + "callerUgi": { + "shortUserName": "alice" + }, + "path": "/developers/file", + "operationName": "delete", + } +} + + + + + +test_bob_no_access_to_alice if { + not allow with input as { + "callerUgi": { + "shortUserName": "bob" + }, + "path": "/alice/file", + "operationName": "delete", + } +} + +test_bob_access_to_bob if { + allow with input as { + "callerUgi": { + "shortUserName": "bob" + }, + "path": "/bob/file", + "operationName": "delete", + } +} + +test_bob_ro_access_to_developers if { + allow with input as { + "callerUgi": { + "shortUserName": "bob" + }, + "path": "/developers/file", + "operationName": "getfileinfo", + } +} + +test_bob_no_rw_access_to_developers if { + not allow with input as { + "callerUgi": { + "shortUserName": "bob" + }, + "path": "/developers/file", + "operationName": "delete", + } +} + +test_bob_rw_access_to_developers_special_file if { + allow with input as { + "callerUgi": { + "shortUserName": "bob" + }, + "path": "/developers/file-from-bob", + "operationName": "listStatus", # FIXME: Change to operation that needs rw action + } +} From bd76d0ed4e5e810a23ef08ef010d7a4e66838ef1 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 6 Feb 2024 10:52:01 +0100 Subject: [PATCH 04/22] rego: Add new operations --- rego/hdfs.rego | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rego/hdfs.rego b/rego/hdfs.rego index 107666e..c3972fd 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -49,10 +49,12 @@ action_hierarchy := { action_for_operation := { "getfileinfo": "ro", "listStatus": "ro", + "mkdirs": "full", # TODO check if this is ok "delete": "full", + "rename": "full", # FIXME: Should check source *and* target } -groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} +groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["admins"]} acls := [ { From d5d99418b1022580a6a89123144f675fe5f5530b Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 6 Feb 2024 10:52:32 +0100 Subject: [PATCH 05/22] reduce log level --- src/main/java/tech/stackable/hadoop/StackableGroupMapper.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java index ba53b63..6a3caf0 100644 --- a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java +++ b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java @@ -53,8 +53,8 @@ public StackableGroupMapper() { throw new RuntimeException("Config \"" + OPA_MAPPING_GROUP_NAME_PROP + "\" missing"); } - LOG.info("OPA mapping URL [{}]", opaMappingUrl); - LOG.info("OPA mapping group [{}]", mappingGroupName); + LOG.debug("OPA mapping URL [{}]", opaMappingUrl); + LOG.debug("OPA mapping group [{}]", mappingGroupName); this.json = new ObjectMapper() From 968bc019c742266b592b4047fcc8bd152b2b9ae9 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Tue, 6 Feb 2024 10:53:12 +0100 Subject: [PATCH 06/22] update test regos --- test/stack/11-rego-rules.yaml | 45 ++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index 9ce98ec..a60c08a 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -49,7 +49,6 @@ data: action_hierarchy[action][_] == action_for_operation[operation] } - action_hierarchy := { "full": ["full", "rw","ro"], "rw": ["rw", "ro"], @@ -59,37 +58,45 @@ data: action_for_operation := { "getfileinfo": "ro", "listStatus": "ro", + "mkdirs": "full", # TODO check if this is ok "delete": "full", + "rename": "full", # FIXME: Should check source *and* target } - groups := {"HTTP": ["http-group"]} + groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["admins"]} acls := [ - { - "identity": "user:HTTP", - "action": "full", - "resource": "hdfs:file:/hosts", - }, { - "identity": "group:http-group", + "identity": "group:admins", "action": "full", "resource": "hdfs:dir:/", }, { - "identity": "user:HTTP", + "identity": "group:developers", + "action": "full", + "resource": "hdfs:dir:/developers/", + }, + { + "identity": "user:alice", + "action": "full", + "resource": "hdfs:dir:/alice/", + }, + { + "identity": "user:bob", "action": "full", - "resource": "hdfs:dir:/ro/full/", + "resource": "hdfs:dir:/bob/", + }, + { + "identity": "user:bob", + "action": "ro", + "resource": "hdfs:dir:/developers/", + }, + { + "identity": "user:bob", + "action": "rw", + "resource": "hdfs:file:/developers/file-from-bob", }, ] - - # hdfs:file:/hosts - # hdfs:file:/hosts/andrew_* - # hdfs:dir:/hosts/ - # kafka:topic:events - # trino:table:tpch.sf1.customers - # trino:schema:tpch.sf1 - # trino:catalog:tpch - hdfs-groups.rego: | package hdfs_groups From b502536433536faec6d0c7809eb8e2ca5da2d7e5 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 7 Feb 2024 11:03:55 +0100 Subject: [PATCH 07/22] update rego rules --- rego/hdfs.rego | 32 ++++++++++++++++++++++---------- rego/hdfs_test.rego | 10 ++++++++++ test/stack/11-rego-rules.yaml | 32 ++++++++++++++++++++++---------- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/rego/hdfs.rego b/rego/hdfs.rego index c3972fd..aaf565e 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -28,10 +28,15 @@ matches_resource(file, resource) if { resource == concat("", ["hdfs:file:", file]) } +# Resource mentions the directory explicitly +matches_resource(file, resource) if { + trim_suffix(resource, "/") == concat("", ["hdfs:dir:", file]) +} + # Resource mentions a folder higher up the tree, which will will grant access recursively matches_resource(file, resource) if { startswith(resource, "hdfs:dir:/") - # dirs need to have a trailing slash + # directories need to have a trailing slash endswith(resource, "/") startswith(file, trim_prefix(resource, "hdfs:dir:")) } @@ -49,32 +54,39 @@ action_hierarchy := { action_for_operation := { "getfileinfo": "ro", "listStatus": "ro", - "mkdirs": "full", # TODO check if this is ok - "delete": "full", - "rename": "full", # FIXME: Should check source *and* target + "open": "ro", + "mkdirs": "rw", # TODO check if this is ok + "delete": "rw", + "rename": "rw", # The "rename" operation will be actually called on both - the source and the target location. + # Because of this you need to have rw permissions on the source and target file - which is desired } -groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["admins"]} +groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["developers"]} acls := [ { "identity": "group:admins", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/", }, { "identity": "group:developers", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/developers/", }, + { + "identity": "group:developers", + "action": "ro", + "resource": "hdfs:dir:/developers-ro/", + }, { "identity": "user:alice", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/alice/", }, { "identity": "user:bob", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/bob/", }, { @@ -82,7 +94,7 @@ acls := [ "action": "ro", "resource": "hdfs:dir:/developers/", }, - { + { "identity": "user:bob", "action": "rw", "resource": "hdfs:file:/developers/file-from-bob", diff --git a/rego/hdfs_test.rego b/rego/hdfs_test.rego index 3d694f5..c55af92 100644 --- a/rego/hdfs_test.rego +++ b/rego/hdfs_test.rego @@ -45,6 +45,16 @@ test_admin_access_to_developers if { +test_alice_access_to_alice_folder if { + allow with input as { + "callerUgi": { + "shortUserName": "alice" + }, + "path": "/alice", + "operationName": "getfileinfo", + } +} + test_alice_access_to_alice if { allow with input as { "callerUgi": { diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index a60c08a..a44c7d3 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -37,10 +37,15 @@ data: resource == concat("", ["hdfs:file:", file]) } + # Resource mentions the directory explicitly + matches_resource(file, resource) if { + trim_suffix(resource, "/") == concat("", ["hdfs:dir:", file]) + } + # Resource mentions a folder higher up the tree, which will will grant access recursively matches_resource(file, resource) if { startswith(resource, "hdfs:dir:/") - # dirs need to have a trailing slash + # directories need to have a trailing slash endswith(resource, "/") startswith(file, trim_prefix(resource, "hdfs:dir:")) } @@ -58,32 +63,39 @@ data: action_for_operation := { "getfileinfo": "ro", "listStatus": "ro", - "mkdirs": "full", # TODO check if this is ok - "delete": "full", - "rename": "full", # FIXME: Should check source *and* target + "open": "ro", + "mkdirs": "rw", # TODO check if this is ok + "delete": "rw", + "rename": "rw", # The "rename" operation will be actually called on both - the source and the target location. + # Because of this you need to have rw permissions on the source and target file - which is desired } - groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["admins"]} + groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["developers"]} acls := [ { "identity": "group:admins", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/", }, { "identity": "group:developers", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/developers/", }, + { + "identity": "group:developers", + "action": "ro", + "resource": "hdfs:dir:/developers-ro/", + }, { "identity": "user:alice", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/alice/", }, { "identity": "user:bob", - "action": "full", + "action": "rw", "resource": "hdfs:dir:/bob/", }, { @@ -91,7 +103,7 @@ data: "action": "ro", "resource": "hdfs:dir:/developers/", }, - { + { "identity": "user:bob", "action": "rw", "resource": "hdfs:file:/developers/file-from-bob", From 8b334f8127939e9c34ef67463b4861baddc8df45 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 8 Feb 2024 16:31:19 +0100 Subject: [PATCH 08/22] Let old APi call new API --- rego/hdfs.rego | 10 ++++-- .../StackableAccessControlEnforcer.java | 36 +++++++++++++++---- test/stack/11-rego-rules.yaml | 10 ++++-- 3 files changed, 44 insertions(+), 12 deletions(-) diff --git a/rego/hdfs.rego b/rego/hdfs.rego index aaf565e..4fde9ad 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -57,11 +57,15 @@ action_for_operation := { "open": "ro", "mkdirs": "rw", # TODO check if this is ok "delete": "rw", - "rename": "rw", # The "rename" operation will be actually called on both - the source and the target location. - # Because of this you need to have rw permissions on the source and target file - which is desired + # The "rename" operation will be actually called on both - the source and the target location. + # Because of this you need to have rw permissions on the source and target file - which is desired + "rename": "rw", + # The hdfs codebase still does some calls to the deprecated API, which is missing the operationName, so we just + # assume the worst and require "rw" access. + "deprecatedCheckPermissionApi": "rw", } -groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["developers"]} +groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} acls := [ { diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java index 866d3ef..f5a64f2 100644 --- a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java +++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java @@ -14,12 +14,12 @@ import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider; import org.apache.hadoop.hdfs.server.namenode.INodeAttributes; +import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.net.URI; import java.net.http.HttpClient; import java.net.http.HttpRequest; @@ -87,10 +87,34 @@ public void checkPermission(String fsOwner, String supergroup, // We are using the new "checkPermissionWithContext" API, as indicated by the log statement // "Use the new authorization provider API". All the calls to this old function only happen when opType == null, - // in which case we have no idea on what to authorize at, so we just allow it. - // FIXME: Needs testing - - // throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API."); + // in which case we have no idea on what to authorize at, so we put in the operationName "deprecatedCheckPermissionApi". + // Rego rules need to check for the maximum access level, as this can be potentially any operation. + + INodeAttributeProvider.AuthorizationContext.Builder builder = + new INodeAttributeProvider.AuthorizationContext.Builder(); + builder.fsOwner(fsOwner). + supergroup(supergroup). + callerUgi(ugi). + inodeAttrs(inodeAttrs). + inodes(inodes). + pathByNameArr(pathByNameArr). + snapshotId(snapshotId). + path(path). + ancestorIndex(ancestorIndex). + doCheckOwner(doCheckOwner). + ancestorAccess(ancestorAccess). + parentAccess(parentAccess). + access(access). + subAccess(subAccess). + ignoreEmptyDir(ignoreEmptyDir). + operationName("deprecatedCheckPermissionApi"). + callerContext(CallerContext.getCurrent()); + this.checkPermissionWithContext(builder.build()); + +// throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. Passed arguments: " +// + "fsOwner: " + fsOwner + ", supergroup: " + supergroup + ", ugi: " + ugi + ", path: " + path + ", ancestorIndex:" + ancestorIndex +// + ", doCheckOwner: " + doCheckOwner + ", ancestorAccess: " + ancestorAccess + ", parentAccess: " + parentAccess +// + ", subAccess: " + subAccess + ", ignoreEmptyDir: " + ignoreEmptyDir); } @Override @@ -104,7 +128,7 @@ public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationConte throw new OpaException.SerializeFailed(e); } - LOG.debug("Request body [{}]", body); + LOG.info("Request body [{}]", body); HttpResponse response = null; try { response = diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index a44c7d3..3b08832 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -66,11 +66,15 @@ data: "open": "ro", "mkdirs": "rw", # TODO check if this is ok "delete": "rw", - "rename": "rw", # The "rename" operation will be actually called on both - the source and the target location. - # Because of this you need to have rw permissions on the source and target file - which is desired + # The "rename" operation will be actually called on both - the source and the target location. + # Because of this you need to have rw permissions on the source and target file - which is desired + "rename": "rw", + # The hdfs codebase still does some calls to the deprecated API, which is missing the operationName, so we just + # assume the worst and require "rw" access. + "deprecatedCheckPermissionApi": "rw", } - groups := {"admin": ["admins"], "alice": ["developers"], "bob": [], "HTTP": ["developers"]} + groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} acls := [ { From 535391b2c1d12c7588207e95b4b8464bb7412db1 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 9 Feb 2024 11:20:02 +0100 Subject: [PATCH 09/22] Avoid using old API at all costs (needs patched HDFS) --- rego/hdfs.rego | 4 +- .../StackableAccessControlEnforcer.java | 55 ++++++++----------- test/stack/11-rego-rules.yaml | 4 +- 3 files changed, 24 insertions(+), 39 deletions(-) diff --git a/rego/hdfs.rego b/rego/hdfs.rego index 4fde9ad..c77182f 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -56,13 +56,11 @@ action_for_operation := { "listStatus": "ro", "open": "ro", "mkdirs": "rw", # TODO check if this is ok + "create": "rw", "delete": "rw", # The "rename" operation will be actually called on both - the source and the target location. # Because of this you need to have rw permissions on the source and target file - which is desired "rename": "rw", - # The hdfs codebase still does some calls to the deprecated API, which is missing the operationName, so we just - # assume the worst and require "rw" access. - "deprecatedCheckPermissionApi": "rw", } groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java index f5a64f2..b890568 100644 --- a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java +++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java @@ -14,7 +14,6 @@ import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider; import org.apache.hadoop.hdfs.server.namenode.INodeAttributes; -import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.slf4j.Logger; @@ -26,6 +25,17 @@ import java.net.http.HttpResponse; import java.util.Objects; +// As of 2024-02-09 INodeAttributeProvider.AccessControlEnforcer has two functions: The old - deprecated - +// checkPermission and the new checkPermissionWithContext. HDFS uses reflection to check if the authorizer +// supports the new API (which we do) and uses that in this case. This is also indicated by the log statement +// "Use the new authorization provider API" during startup, see https://github.com/apache/hadoop/blob/50d256ef3c2531563bc6ba96dec6b78e154b4697/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java#L245 +// FSPermissionChecker has a ThreadLocal operationType, which needs to be set to e.g. "create", "delete" or +// "rename" prior to calling the FSPermissionChecker.checkPermission function, as it will actually check if +// operationType is null and will still use the old API in this case! But the old API does not have the +// information about the operationType, which makes it hard to impossible to authorize the request. As a +// consequence we only support the new API and will make sure no HDFS code path calls the old API. This required +// minor patches to HDFS, as it was e.g. missing a call to FSPermissionChecker.setOperationType("create") in +// FSNamesystem.startFileInt (this claim needs to be validated though). public class StackableAccessControlEnforcer implements INodeAttributeProvider.AccessControlEnforcer { private static final Logger LOG = LoggerFactory.getLogger(StackableAccessControlEnforcer.class); @@ -83,38 +93,17 @@ public void checkPermission(String fsOwner, String supergroup, int ancestorIndex, boolean doCheckOwner, FsAction ancestorAccess, FsAction parentAccess, FsAction access, FsAction subAccess, boolean ignoreEmptyDir) throws AccessControlException { - LOG.info("checkPermission called"); - - // We are using the new "checkPermissionWithContext" API, as indicated by the log statement - // "Use the new authorization provider API". All the calls to this old function only happen when opType == null, - // in which case we have no idea on what to authorize at, so we put in the operationName "deprecatedCheckPermissionApi". - // Rego rules need to check for the maximum access level, as this can be potentially any operation. - - INodeAttributeProvider.AuthorizationContext.Builder builder = - new INodeAttributeProvider.AuthorizationContext.Builder(); - builder.fsOwner(fsOwner). - supergroup(supergroup). - callerUgi(ugi). - inodeAttrs(inodeAttrs). - inodes(inodes). - pathByNameArr(pathByNameArr). - snapshotId(snapshotId). - path(path). - ancestorIndex(ancestorIndex). - doCheckOwner(doCheckOwner). - ancestorAccess(ancestorAccess). - parentAccess(parentAccess). - access(access). - subAccess(subAccess). - ignoreEmptyDir(ignoreEmptyDir). - operationName("deprecatedCheckPermissionApi"). - callerContext(CallerContext.getCurrent()); - this.checkPermissionWithContext(builder.build()); - -// throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. Passed arguments: " -// + "fsOwner: " + fsOwner + ", supergroup: " + supergroup + ", ugi: " + ugi + ", path: " + path + ", ancestorIndex:" + ancestorIndex -// + ", doCheckOwner: " + doCheckOwner + ", ancestorAccess: " + ancestorAccess + ", parentAccess: " + parentAccess -// + ", subAccess: " + subAccess + ", ignoreEmptyDir: " + ignoreEmptyDir); + LOG.warn("checkPermission called"); + + new Throwable().printStackTrace(); + throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. " + + "This should not happen, as all HDFS code paths should call the new API. " + + "I dumped the stack trace for you (check active namenode logs), so you can figure out which code path it was. " + + "Please report all of that to author of the OPA authorizer (We don't have a stable GitHub link yet, sorry!) " + + "Passed arguments: " + + "fsOwner: " + fsOwner + ", supergroup: " + supergroup + ", ugi: " + ugi + ", path: " + path + ", ancestorIndex:" + ancestorIndex + + ", doCheckOwner: " + doCheckOwner + ", ancestorAccess: " + ancestorAccess + ", parentAccess: " + parentAccess + + ", subAccess: " + subAccess + ", ignoreEmptyDir: " + ignoreEmptyDir); } @Override diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index 3b08832..91a77e7 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -65,13 +65,11 @@ data: "listStatus": "ro", "open": "ro", "mkdirs": "rw", # TODO check if this is ok + "create": "rw", "delete": "rw", # The "rename" operation will be actually called on both - the source and the target location. # Because of this you need to have rw permissions on the source and target file - which is desired "rename": "rw", - # The hdfs codebase still does some calls to the deprecated API, which is missing the operationName, so we just - # assume the worst and require "rw" access. - "deprecatedCheckPermissionApi": "rw", } groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} From 236a0fbe1bf5386c08be0c6b67765ccbb8a9db2e Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 9 Feb 2024 11:52:03 +0100 Subject: [PATCH 10/22] Union opa url settings into hadoop.security.group.mapping.opa.policy.url --- README.md | 8 +-- rego/hdfs.rego | 22 +++++-- .../hadoop/StackableGroupMapper.java | 27 ++++---- test/stack/11-rego-rules.yaml | 65 +++++-------------- test/stack/20-hdfs.yaml | 3 +- 5 files changed, 50 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 6075aa4..8ac3a8f 100644 --- a/README.md +++ b/README.md @@ -62,13 +62,9 @@ Group mappings are resolved on the NameNode and the following configuration shou * Fixed value of `"tech.stackable.hadoop.StackableGroupMapper"` -#### hadoop.security.group.mapping.opa.url +#### hadoop.security.group.mapping.opa.policy.url -* The Opa Server endpoint e.g. `"http://opa.default.svc.cluster.local:8081/v1/data/hdfsgroups"` - -#### hadoop.security.group.mapping.opa.list.name - -* Opa responses have a [root field](https://www.openpolicyagent.org/docs/latest/rest-api/#response-message) called `result`: the result itself - in this case of a list of user groups - is a top-level field within the root field and is configurable i.e. the group mapper just needs to look up this field from the response and this is passed in the configuration. This means that both the output format of the rego rule and the corresponding response field are configured independently of the group mapper. +* The Opa Server endpoint e.g. `"http://opa.default.svc.cluster.local:8081/v1/data/hdfs/groups"` #### hadoop.user.group.static.mapping.overrides diff --git a/rego/hdfs.rego b/rego/hdfs.rego index c77182f..2c13e43 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -4,6 +4,7 @@ import rego.v1 default allow = false +# HDFS authorizer allow if { some acl in acls matches_identity(input.callerUgi.shortUserName, acl.identity) @@ -11,6 +12,20 @@ allow if { action_sufficient_for_operation(acl.action, input.operationName) } +# HDFS group mapper +# This will return the group data in this form: +# "result": { +# "groups": [ +# "admin", +# "developers" +# ] +# ... +groups := {group | + raw = groups_for_user[input.username][_] + # Keycloak groups have trailing slashes + group := trim_prefix(raw, "/") +} + # Identity mentions the user explicitly matches_identity(user, identity) if { identity == concat("", ["user:", user]) @@ -18,11 +33,10 @@ matches_identity(user, identity) if { # Identity mentions group the user is part of matches_identity(user, identity) if { - some group in groups[user] + some group in groups_for_user[user] identity == concat("", ["group:", group]) } - # Resource mentions the file explicitly matches_resource(file, resource) if { resource == concat("", ["hdfs:file:", file]) @@ -55,7 +69,7 @@ action_for_operation := { "getfileinfo": "ro", "listStatus": "ro", "open": "ro", - "mkdirs": "rw", # TODO check if this is ok + "mkdirs": "rw", "create": "rw", "delete": "rw", # The "rename" operation will be actually called on both - the source and the target location. @@ -63,7 +77,7 @@ action_for_operation := { "rename": "rw", } -groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} +groups_for_user := {"admin": ["admins"], "alice": ["developers"], "bob": []} acls := [ { diff --git a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java index 6a3caf0..ca0b916 100644 --- a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java +++ b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java @@ -4,7 +4,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.IOException; import java.net.URI; import java.net.http.HttpClient; import java.net.http.HttpRequest; @@ -13,6 +12,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; + +import com.fasterxml.jackson.databind.type.TypeFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.GroupMappingServiceProvider; import org.slf4j.Logger; @@ -22,12 +23,9 @@ public class StackableGroupMapper implements GroupMappingServiceProvider { private static final Logger LOG = LoggerFactory.getLogger(StackableGroupMapper.class); - public static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.url"; - private static final String OPA_MAPPING_GROUP_NAME_PROP = - "hadoop.security.group.mapping.opa.list.name"; + public static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.policy.url"; // response base field: see https://www.openpolicyagent.org/docs/latest/rest-api/#response-message private static final String OPA_RESULT_FIELD = "result"; - private final String mappingGroupName; private final HttpClient httpClient = HttpClient.newHttpClient(); private final ObjectMapper json; @@ -48,13 +46,7 @@ public StackableGroupMapper() { throw new OpaException.UriInvalid(opaUri, e); } - this.mappingGroupName = configuration.get(OPA_MAPPING_GROUP_NAME_PROP); - if (mappingGroupName == null) { - throw new RuntimeException("Config \"" + OPA_MAPPING_GROUP_NAME_PROP + "\" missing"); - } - LOG.debug("OPA mapping URL [{}]", opaMappingUrl); - LOG.debug("OPA mapping group [{}]", mappingGroupName); this.json = new ObjectMapper() @@ -67,6 +59,10 @@ public StackableGroupMapper() { .setSerializationInclusion(JsonInclude.Include.NON_NULL); } + private static class OpaQueryResult { + public List result; + } + /** * Returns list of groups for a user. * @@ -111,14 +107,13 @@ public List getGroups(String user) { throw new OpaException.OpaServerError(query.toString(), response); } - List groups; + OpaQueryResult result; try { - @SuppressWarnings("unchecked") - Map result = (Map) json.readValue(response.body(), HashMap.class).get(OPA_RESULT_FIELD); - groups = (List) result.get(this.mappingGroupName); - } catch (Exception e) { + result = json.readValue(response.body(), OpaQueryResult.class); + } catch (JsonProcessingException e) { throw new OpaException.DeserializeFailed(e); } + List groups = result.result; LOG.debug("Groups for [{}]: [{}]", user, groups); diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index 91a77e7..5e6c30b 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -13,6 +13,7 @@ data: default allow = false + # HDFS authorizer allow if { some acl in acls matches_identity(input.callerUgi.shortUserName, acl.identity) @@ -20,6 +21,20 @@ data: action_sufficient_for_operation(acl.action, input.operationName) } + # HDFS group mapper + # This will return the group data in this form: + # "result": { + # "groups": [ + # "admin", + # "developers" + # ] + # ... + groups := {group | + raw = groups_for_user[input.username][_] + # Keycloak groups have trailing slashes + group := trim_prefix(raw, "/") + } + # Identity mentions the user explicitly matches_identity(user, identity) if { identity == concat("", ["user:", user]) @@ -27,11 +42,10 @@ data: # Identity mentions group the user is part of matches_identity(user, identity) if { - some group in groups[user] + some group in groups_for_user[user] identity == concat("", ["group:", group]) } - # Resource mentions the file explicitly matches_resource(file, resource) if { resource == concat("", ["hdfs:file:", file]) @@ -64,7 +78,7 @@ data: "getfileinfo": "ro", "listStatus": "ro", "open": "ro", - "mkdirs": "rw", # TODO check if this is ok + "mkdirs": "rw", "create": "rw", "delete": "rw", # The "rename" operation will be actually called on both - the source and the target location. @@ -72,7 +86,7 @@ data: "rename": "rw", } - groups := {"admin": ["admins"], "alice": ["developers"], "bob": []} + groups_for_user := {"admin": ["admins"], "alice": ["developers"], "bob": []} acls := [ { @@ -111,46 +125,3 @@ data: "resource": "hdfs:file:/developers/file-from-bob", }, ] - hdfs-groups.rego: | - package hdfs_groups - - # this will return the group data in this form: - # "result": { - # "groups": [ - # "admin", - # "superuser" - # ] - # ... - groups := {g | - raw = users_by_name[input.username].groups[_] - g := trim(raw, "/") - } - - # returning data in the form presented by the UIF - users_by_name := { - "alice": { - "id": "af07f12c-1234-40a7-93e0-874537bdf3f5", - "username": "alice", - "groups": ["/superset-admin"], - "customAttributes": {}, - }, - "bob": { - "id": "af07f12c-2345-40a7-93e0-874537bdf3f5", - "username": "bob", - "groups": ["/admin"], - "customAttributes": {}, - }, - "stackable": { - "id": "af07f12c-3456-40a7-93e0-874537bdf3f5", - "username": "stackable", - "groups": ["/admin", "/superuser"], - "customAttributes": {}, - }, - # Hadoop will use the short-name for group mappings - "nn": { - "id": "af07f12c-7890-40a7-93e0-874537bdf3f5", - "username": "nn", - "groups": ["/admin", "/superuser"], - "customAttributes": {}, - }, - } diff --git a/test/stack/20-hdfs.yaml b/test/stack/20-hdfs.yaml index df1fee6..fae8fb8 100644 --- a/test/stack/20-hdfs.yaml +++ b/test/stack/20-hdfs.yaml @@ -44,8 +44,7 @@ spec: core-site.xml: # The mapper is only handled on the namenode so no need to apply this config to all roles hadoop.security.group.mapping: tech.stackable.hadoop.StackableGroupMapper - hadoop.security.group.mapping.opa.url: http://opa.default.svc.cluster.local:8081/v1/data/hdfs_groups - hadoop.security.group.mapping.opa.list.name: groups + hadoop.security.group.mapping.opa.policy.url: http://opa.default.svc.cluster.local:8081/v1/data/hdfs/groups hadoop.security.authorization.opa.policy.url: http://opa.default.svc.cluster.local:8081/v1/data/hdfs/allow # The operator adds a default static mapping when kerberos is activated, see: # https://github.com/stackabletech/hdfs-operator/blob/main/rust/operator-binary/src/kerberos.rs#L97-L101 From 5325ee6169645ea671ab70385a1e777fe57bc70d Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 9 Feb 2024 11:52:23 +0100 Subject: [PATCH 11/22] add test job --- test/stack/30-test-hdfs-permissions.yaml | 89 ++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 test/stack/30-test-hdfs-permissions.yaml diff --git a/test/stack/30-test-hdfs-permissions.yaml b/test/stack/30-test-hdfs-permissions.yaml new file mode 100644 index 0000000..951ab46 --- /dev/null +++ b/test/stack/30-test-hdfs-permissions.yaml @@ -0,0 +1,89 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: test-hdfs-permissions +spec: + template: + spec: + containers: + - name: test-hdfs-permissions + image: docker.stackable.tech/stackable/hadoop:3.3.6-stackable0.0.0-dev + env: + - name: HADOOP_CONF_DIR + value: /stackable/conf/hdfs + - name: KRB5_CONFIG + value: /stackable/kerberos/krb5.conf + - name: HADOOP_OPTS + value: -Djava.security.krb5.conf=/stackable/kerberos/krb5.conf + command: + - /bin/bash + - -c + - | + set -ex + klist -k /stackable/kerberos/keytab + + log_in () { kdestroy; kinit -kt /stackable/kerberos/keytab $1/test-hdfs-permissions.default.svc.cluster.local; } + + log_in admin + bin/hdfs dfs -ls / + bin/hdfs dfs -mkdir -p /alice + bin/hdfs dfs -mkdir -p /bob + bin/hdfs dfs -mkdir -p /developers + bin/hdfs dfs -mkdir -p /developers-ro + bin/hdfs dfs -ls -R / + + log_in alice + bin/hdfs dfs -ls / && exit 1 + bin/hdfs dfs -ls /alice + bin/hdfs dfs -ls /bob && exit 1 + bin/hdfs dfs -ls /developers + bin/hdfs dfs -ls /developers-ro + + bin/hdfs dfs -put -f /etc/hosts /alice/ + bin/hdfs dfs -put -f /etc/hosts /bob/ && exit 1 + bin/hdfs dfs -put -f /etc/hosts /developers/ + bin/hdfs dfs -put -f /etc/hosts /developers-ro/ && exit 1 + + log_in bob + bin/hdfs dfs -ls / && exit 1 + bin/hdfs dfs -ls /alice && exit 1 + bin/hdfs dfs -ls /bob + bin/hdfs dfs -ls /developers + bin/hdfs dfs -ls /developers-ro && exit 1 + + sleep infinity + + bin/hdfs dfs -ls / + bin/hdfs dfs -rm -f /hosts + bin/hdfs dfs -put -f /etc/hosts /hosts + bin/hdfs dfs -ls / + bin/hdfs dfs -cat /hosts + volumeMounts: + - name: hdfs-config + mountPath: /stackable/conf/hdfs + - name: kerberos + mountPath: /stackable/kerberos + volumes: + - name: hdfs-config + configMap: + name: simple-hdfs + - name: kerberos + ephemeral: + volumeClaimTemplate: + metadata: + annotations: + secrets.stackable.tech/class: kerberos-default + secrets.stackable.tech/scope: service=test-hdfs-permissions + secrets.stackable.tech/kerberos.service.names: admin,alice,bob + spec: + storageClassName: secrets.stackable.tech + accessModes: + - ReadWriteOnce + resources: + requests: + storage: "1" + securityContext: + fsGroup: 1000 + runAsGroup: 1000 + runAsUser: 1000 + restartPolicy: OnFailure From 00245de8d5ce441aa918922b0270bbbf69cc9d3a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 15 Feb 2024 09:10:32 +0100 Subject: [PATCH 12/22] update docs --- .../hadoop/StackableAccessControlEnforcer.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java index b890568..3388bda 100644 --- a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java +++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java @@ -29,11 +29,11 @@ // checkPermission and the new checkPermissionWithContext. HDFS uses reflection to check if the authorizer // supports the new API (which we do) and uses that in this case. This is also indicated by the log statement // "Use the new authorization provider API" during startup, see https://github.com/apache/hadoop/blob/50d256ef3c2531563bc6ba96dec6b78e154b4697/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java#L245 -// FSPermissionChecker has a ThreadLocal operationType, which needs to be set to e.g. "create", "delete" or -// "rename" prior to calling the FSPermissionChecker.checkPermission function, as it will actually check if -// operationType is null and will still use the old API in this case! But the old API does not have the -// information about the operationType, which makes it hard to impossible to authorize the request. As a -// consequence we only support the new API and will make sure no HDFS code path calls the old API. This required +// FSPermissionChecker (as a caller of the AccessControlEnforcer interface) has a ThreadLocal operationType, which +// needs to be set to e.g. "create", "delete" or "rename" prior to calling the FSPermissionChecker.checkPermission +// function, as it will actually check if operationType is null and will still use the old API in this case! But the old +// API does not have the information about the operationType, which makes it hard to impossible to authorize the request. +// As a consequence we only support the new API and will make sure no HDFS code path calls the old API. This required // minor patches to HDFS, as it was e.g. missing a call to FSPermissionChecker.setOperationType("create") in // FSNamesystem.startFileInt (this claim needs to be validated though). public class StackableAccessControlEnforcer implements INodeAttributeProvider.AccessControlEnforcer { From 381372a6f2326e3835ce84e7ef180ccfc77bd8e9 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 15 Feb 2024 11:38:41 +0100 Subject: [PATCH 13/22] Add OPA authorizer to README --- README.md | 631 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 591 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 8ac3a8f..ca4d442 100644 --- a/README.md +++ b/README.md @@ -1,76 +1,627 @@ -# Stackable Group Mapper for Apache Hadoop +# Stackable Apache Hadoop utilities [Stackable Data Platform](https://stackable.tech/) | [Platform Docs](https://docs.stackable.tech/) | [Discussions](https://github.com/orgs/stackabletech/discussions) | [Discord](https://discord.gg/7kZ3BNnCAF) -This projects is a plugin for Apache Hadoop, which can look up groups for users in an OpenPolicyAgent (OPA) instance. -It is intended to be used with the [Stackable Data Platform](https://stackable.tech) +This project contains multiple plugins for Apache Hadoop, which are intended to be used with the [Stackable Data Platform](https://stackable.tech) -## Description +1. OpenPolicyAgent (OPA) authorizer: For every action performed in HDFS an OPA instance is asked if the user is allowed to perform the action. +2. GroupMapper: It can look up group memberships for users from an OPA instance. +3. Not (yet?) in this repository is a [TopologyProvider](https://github.com/stackabletech/hdfs-topology-provider/). -HDFS internally uses user groups for group permissions on folders etc. For this reason it is not enough to just have the groups in OPA during authorization, but they actually need to be available to Hadoop. Hadoop offers a few default group providers, such as: +## Installation +Run `mvn package` and put the resulting `target/hdfs-group-mapper-0.1.0-SNAPSHOT.jar` file on your HDFS classpath. +The easiest way to achieve this is to put it in the directory `/stackable/hadoop/share/hadoop/tools/lib/`. +The Stackable HDFS already takes care of this, you don't need to do anything in this case. -* LDAP -* Linux user group (usually provided by SSSD or Centrify or similar tools) +## OPA authorizer -Hadoop exposes an [interface](https://github.com/apache/hadoop/blob/rel/release-3.3.6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java) that users can implement to extend these [group mappings](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/GroupsMapping.html). The Stackable Group Mapper does this to look up user groups from OPA. +> [!IMPORTANT] +> The authorizer only works when used by an HDFS version that includes fixes from https://github.com/apache/hadoop/pull/6553. +> Stackable HDFS versions starting with `3.3.4` already contain this patch. -## OPA Mappings +### Configuration -OPA mappings are returned from the [User-Info-Fetcher](https://docs.stackable.tech/home/nightly/opa/usage-guide/user-info-fetcher#_example_rego_rule) in this form: +- Set `dfs.namenode.inode.attributes.provider.class` in `hdfs-site.xml` to `tech.stackable.hadoop.StackableAuthorizer` +- Set `hadoop.security.authorization.opa.policy.url` in `core-site.xml` to the HTTP endpoint of your OPA rego rule, e.g. `http://opa.default.svc.cluster.local:8081/v1/data/hdfs/allow` -```json -{ - "id": "af07f12c-a2db-40a7-93e0-874537bdf3f5", - "username": "alice", - "groups": [ - "/superset-admin" - ], - "customAttributes": {} -} -``` +### API -The Group Mapper only needs the group listing, which can be requested specifically from the Opa server by providing the current user and filtering out the groups with the `json.filter` function, returning a segment that looks like this: +For every action a request similar to the following is sent to OPA: + +
+Example request ```json { - "result": { - "groups": { + "input": { + "fsOwner": "nn", + "supergroup": "supergroup", + "callerUgi": { + "realUser": null, + "userName": "alice/test-hdfs-permissions.default.svc.cluster.local@CLUSTER.LOCAL", + "shortUserName": "alice", + "primaryGroup": "developers", "groups": [ - "admin", - "superuser" - ] + "developers" + ], + "authenticationMethod": "KERBEROS", + "realAuthenticationMethod": "KERBEROS" + }, + "inodeAttrs": [ + { + "parent": null, + "id": 16385, + "modificationTime": 1707990801081, + "accessTime": 0, + "next": null, + "features": [ + { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + } + ], + "storagePolicyID": 0, + "directorySnapshottableFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "directoryWithQuotaFeature": { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + "directoryWithSnapshotFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "quotaCounts": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + }, + "localStoragePolicyID": 0, + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "localNameBytes": "", + "xattrFeature": null, + "groupName": "supergroup", + "fsPermission": { + "stickyBit": false, + "userAction": "ALL", + "groupAction": "READ_EXECUTE", + "otherAction": "READ_EXECUTE", + "masked": null, + "unmasked": null, + "aclBit": false, + "encryptedBit": false, + "erasureCodedBit": false + }, + "aclFeature": null, + "fsPermissionShort": 493, + "permissionLong": 1099511693805, + "userName": "nn", + "localName": "", + "key": "", + "fullPathName": "/", + "pathComponents": [ + "" + ], + "objectString": "INodeDirectory@3ae77112", + "parentString": "parent=null", + "parentReference": null + }, + { + "parent": { + "parent": null, + "id": 16385, + "modificationTime": 1707990801081, + "accessTime": 0, + "next": null, + "features": [ + { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + } + ], + "storagePolicyID": 0, + "directorySnapshottableFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "directoryWithQuotaFeature": { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + "directoryWithSnapshotFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "quotaCounts": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + }, + "localStoragePolicyID": 0, + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "localNameBytes": "", + "xattrFeature": null, + "groupName": "supergroup", + "fsPermission": { + "stickyBit": false, + "userAction": "ALL", + "groupAction": "READ_EXECUTE", + "otherAction": "READ_EXECUTE", + "masked": null, + "unmasked": null, + "aclBit": false, + "encryptedBit": false, + "erasureCodedBit": false + }, + "aclFeature": null, + "fsPermissionShort": 493, + "permissionLong": 1099511693805, + "userName": "nn", + "localName": "", + "key": "", + "fullPathName": "/", + "pathComponents": [ + "" + ], + "objectString": "INodeDirectory@3ae77112", + "parentString": "parent=null", + "parentReference": null + }, + "id": 16389, + "modificationTime": 1707990801081, + "accessTime": 0, + "next": null, + "features": [], + "storagePolicyID": 0, + "directorySnapshottableFeature": null, + "directoryWithQuotaFeature": null, + "directoryWithSnapshotFeature": null, + "quotaCounts": { + "nameSpace": -1, + "storageSpace": -1, + "typeSpaces": {} + }, + "localStoragePolicyID": 0, + "diffs": null, + "localNameBytes": "ZGV2ZWxvcGVycy1ybw==", + "xattrFeature": null, + "groupName": "supergroup", + "fsPermission": { + "stickyBit": false, + "userAction": "ALL", + "groupAction": "READ_EXECUTE", + "otherAction": "READ_EXECUTE", + "masked": null, + "unmasked": null, + "aclBit": false, + "encryptedBit": false, + "erasureCodedBit": false + }, + "aclFeature": null, + "fsPermissionShort": 493, + "permissionLong": 2199023321581, + "userName": "admin", + "localName": "developers-ro", + "key": "ZGV2ZWxvcGVycy1ybw==", + "fullPathName": "/developers-ro", + "pathComponents": [ + "", + "ZGV2ZWxvcGVycy1ybw==" + ], + "objectString": "INodeDirectory@1df11410", + "parentString": "parentDir=/", + "parentReference": null + }, + null + ], + "inodes": [ + { + "parent": null, + "id": 16385, + "modificationTime": 1707990801081, + "accessTime": 0, + "next": null, + "features": [ + { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + } + ], + "storagePolicyID": 0, + "directorySnapshottableFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "directoryWithQuotaFeature": { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + "directoryWithSnapshotFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "quotaCounts": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + }, + "localStoragePolicyID": 0, + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "localNameBytes": "", + "xattrFeature": null, + "groupName": "supergroup", + "fsPermission": { + "stickyBit": false, + "userAction": "ALL", + "groupAction": "READ_EXECUTE", + "otherAction": "READ_EXECUTE", + "masked": null, + "unmasked": null, + "aclBit": false, + "encryptedBit": false, + "erasureCodedBit": false + }, + "aclFeature": null, + "fsPermissionShort": 493, + "permissionLong": 1099511693805, + "userName": "nn", + "localName": "", + "key": "", + "fullPathName": "/", + "pathComponents": [ + "" + ], + "objectString": "INodeDirectory@3ae77112", + "parentString": "parent=null", + "parentReference": null + }, + { + "parent": { + "parent": null, + "id": 16385, + "modificationTime": 1707990801081, + "accessTime": 0, + "next": null, + "features": [ + { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + } + ], + "storagePolicyID": 0, + "directorySnapshottableFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "directoryWithQuotaFeature": { + "spaceConsumed": { + "nameSpace": 7, + "storageSpace": 1338, + "typeSpaces": {} + }, + "spaceAllowed": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + } + }, + "directoryWithSnapshotFeature": { + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "snapshotQuota": 0, + "numSnapshots": 0, + "snapshotList": [], + "lastSnapshotId": 2147483646 + }, + "quotaCounts": { + "nameSpace": 9223372036854775807, + "storageSpace": -1, + "typeSpaces": {} + }, + "localStoragePolicyID": 0, + "diffs": { + "last": null, + "lastSnapshotId": 2147483646 + }, + "localNameBytes": "", + "xattrFeature": null, + "groupName": "supergroup", + "fsPermission": { + "stickyBit": false, + "userAction": "ALL", + "groupAction": "READ_EXECUTE", + "otherAction": "READ_EXECUTE", + "masked": null, + "unmasked": null, + "aclBit": false, + "encryptedBit": false, + "erasureCodedBit": false + }, + "aclFeature": null, + "fsPermissionShort": 493, + "permissionLong": 1099511693805, + "userName": "nn", + "localName": "", + "key": "", + "fullPathName": "/", + "pathComponents": [ + "" + ], + "objectString": "INodeDirectory@3ae77112", + "parentString": "parent=null", + "parentReference": null + }, + "id": 16389, + "modificationTime": 1707990801081, + "accessTime": 0, + "next": null, + "features": [], + "storagePolicyID": 0, + "directorySnapshottableFeature": null, + "directoryWithQuotaFeature": null, + "directoryWithSnapshotFeature": null, + "quotaCounts": { + "nameSpace": -1, + "storageSpace": -1, + "typeSpaces": {} + }, + "localStoragePolicyID": 0, + "diffs": null, + "localNameBytes": "ZGV2ZWxvcGVycy1ybw==", + "xattrFeature": null, + "groupName": "supergroup", + "fsPermission": { + "stickyBit": false, + "userAction": "ALL", + "groupAction": "READ_EXECUTE", + "otherAction": "READ_EXECUTE", + "masked": null, + "unmasked": null, + "aclBit": false, + "encryptedBit": false, + "erasureCodedBit": false + }, + "aclFeature": null, + "fsPermissionShort": 493, + "permissionLong": 2199023321581, + "userName": "admin", + "localName": "developers-ro", + "key": "ZGV2ZWxvcGVycy1ybw==", + "fullPathName": "/developers-ro", + "pathComponents": [ + "", + "ZGV2ZWxvcGVycy1ybw==" + ], + "objectString": "INodeDirectory@1df11410", + "parentString": "parentDir=/", + "parentReference": null + }, + null + ], + "pathByNameArr": [ + "", + "ZGV2ZWxvcGVycy1ybw==", + "aG9zdHMuX0NPUFlJTkdf" + ], + "snapshotId": 2147483646, + "path": "/developers-ro/hosts._COPYING_", + "ancestorIndex": 1, + "doCheckOwner": false, + "ancestorAccess": null, + "parentAccess": null, + "access": null, + "subAccess": null, + "ignoreEmptyDir": false, + "operationName": "getfileinfo", + "callerContext": { + "context": "CLI", + "signature": null } } } ``` -The leading slash is required by Opa/Keycloak to allow the definition of subgroups, but this should be removed by the rego rule before returning this list of strings to the mapper. +
-## Configuration +## Group mapper -Group mappings are resolved on the NameNode and the following configuration should be added to the NameNode role: +Despites having the OPA authorizer described above there are a few use-cases to have a group mapper as well. -### envOverrides +1. Correctly showing group information in HDFS, e.g. for file ownership. +2. Only use the group mapper without the OAP authorizer -#### HADOOP_CLASSPATH +Hadoop offers a few default group providers, such as: -* Fixed value of `"/stackable/hadoop/share/hadoop/tools/lib/*.jar"` +* LDAP +* Linux user group (usually provided by SSSD, Centrify or similar tools) -### configOverrides / `core-site.xml` +Hadoop exposes an [interface](https://github.com/apache/hadoop/blob/rel/release-3.3.6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java) that users can implement to extend these [group mappings](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/GroupsMapping.html). The Stackable Group Mapper does this to look up user groups from OPA. + +### Configuration -#### hadoop.security.group.mapping +- Set `hadoop.security.group.mapping` in `core-site.xml` to `tech.stackable.hadoop.StackableGroupMapper` +- Set `hadoop.security.group.mapping.opa.policy.url` in `core-site.xml` to the HTTP endpoint of your OPA rego rule, e.g. `http://opa.default.svc.cluster.local:8081/v1/data/hdfs/groups` +- Make sure to not have set `hadoop.user.group.static.mapping.overrides` in `core-site.xml`, as this clashes with the information the group mapper provides. -* Fixed value of `"tech.stackable.hadoop.StackableGroupMapper"` +### API -#### hadoop.security.group.mapping.opa.policy.url +The group mapper sends the following query to OPA: -* The Opa Server endpoint e.g. `"http://opa.default.svc.cluster.local:8081/v1/data/hdfs/groups"` +```json +{ + "input": { + "username": "alice" + } +} +``` -#### hadoop.user.group.static.mapping.overrides +OPA needs to respond with the list of groups as follows: -* The hdfs-operator will add a default static mapping whenever kerberos is activated. This should be removed so that the mapping implementation can provide this information instead: i.e. with an empty string `""` +```json +{ + "result": { + "groups": { + "groups": [ + "admin", + "developers" + ] + } + } +} +``` -## Testing +### Testing CRDs for spinning up test infrastructure are provided in `test/stack`. The Tiltfile will deploy these resources, build and copy the mapper to the docker image, and re-deploy the image to the running HdfsCluster. From fe0289b35a1889fc42e50badd02438263870379d Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 15 Feb 2024 11:44:38 +0100 Subject: [PATCH 14/22] consolidate logging --- .../hadoop/StackableAccessControlEnforcer.java | 8 ++++---- .../tech/stackable/hadoop/StackableAuthorizer.java | 4 ++-- .../stackable/hadoop/StackableGroupMapper.java | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java index 3388bda..81f7a99 100644 --- a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java +++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java @@ -47,7 +47,7 @@ public class StackableAccessControlEnforcer implements INodeAttributeProvider.Ac private URI opaUri; public StackableAccessControlEnforcer() { - LOG.info("Starting StackableAccessControlEnforcer"); + LOG.debug("Starting StackableAccessControlEnforcer"); // Guaranteed to be only called once (Effective Java: Item 3) Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration(); @@ -78,7 +78,7 @@ public StackableAccessControlEnforcer() { // Otherwise we get com.fasterxml.jackson.databind.JsonMappingException: Infinite recursion (StackOverflowError) .addMixIn(DatanodeDescriptor.class, DatanodeDescriptorMixin.class); - LOG.info("Started HdfsOpaAccessControlEnforcer"); + LOG.debug("Started HdfsOpaAccessControlEnforcer"); } private static class OpaQueryResult { @@ -117,7 +117,7 @@ public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationConte throw new OpaException.SerializeFailed(e); } - LOG.info("Request body [{}]", body); + LOG.debug("Request body: {}", body); HttpResponse response = null; try { response = @@ -127,7 +127,7 @@ public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationConte .POST(HttpRequest.BodyPublishers.ofString(body)) .build(), HttpResponse.BodyHandlers.ofString()); - LOG.debug("Opa response [{}]", response.body()); + LOG.debug("Opa response: {}", response.body()); } catch (Exception e) { LOG.error(e.getMessage()); throw new OpaException.QueryFailed(e); diff --git a/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java b/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java index f6de8e7..8f63e7d 100644 --- a/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java +++ b/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java @@ -11,12 +11,12 @@ public class StackableAuthorizer extends INodeAttributeProvider { @Override public void start() { - LOG.info("Starting HdfsOpaAuthorizer"); + LOG.debug("Starting HdfsOpaAuthorizer"); } @Override public void stop() { - LOG.info("Stopping HdfsOpaAuthorizer"); + LOG.debug("Stopping HdfsOpaAuthorizer"); } @Override diff --git a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java index ca0b916..b1ee34a 100644 --- a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java +++ b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java @@ -46,7 +46,7 @@ public StackableGroupMapper() { throw new OpaException.UriInvalid(opaUri, e); } - LOG.debug("OPA mapping URL [{}]", opaMappingUrl); + LOG.debug("OPA mapping URL: {}", opaMappingUrl); this.json = new ObjectMapper() @@ -71,7 +71,7 @@ private static class OpaQueryResult { */ @Override public List getGroups(String user) { - LOG.info("Calling StackableGroupMapper.getGroups for user [{}]", user); + LOG.info("Calling StackableGroupMapper.getGroups for user \"{}\"", user); OpaGroupsQuery query = new OpaGroupsQuery(new OpaGroupsQuery.OpaGroupsQueryInput(user)); @@ -82,7 +82,7 @@ public List getGroups(String user) { throw new OpaException.SerializeFailed(e); } - LOG.debug("Request body [{}]", body); + LOG.debug("Request body: {}", body); HttpResponse response = null; try { response = @@ -92,7 +92,7 @@ public List getGroups(String user) { .POST(HttpRequest.BodyPublishers.ofString(body)) .build(), HttpResponse.BodyHandlers.ofString()); - LOG.debug("Opa response [{}]", response.body()); + LOG.debug("Opa response: {}", response.body()); } catch (Exception e) { LOG.error(e.getMessage()); throw new OpaException.QueryFailed(e); @@ -115,7 +115,7 @@ public List getGroups(String user) { } List groups = result.result; - LOG.debug("Groups for [{}]: [{}]", user, groups); + LOG.debug("Groups for \"{}\": {}", user, groups); return groups; } @@ -124,7 +124,7 @@ public List getGroups(String user) { @Override public void cacheGroupsRefresh() { // does nothing in this provider of user to groups mapping - LOG.info("ignoring cacheGroupsRefresh: caching should be provided by the policy provider"); + LOG.debug("ignoring cacheGroupsRefresh: caching should be provided by the policy provider"); } /** @@ -135,7 +135,7 @@ public void cacheGroupsRefresh() { @Override public void cacheGroupsAdd(List groups) { // does nothing in this provider of user to groups mapping - LOG.info( + LOG.debug( "ignoring cacheGroupsAdd for groups [{}]: caching should be provided by the policy provider", groups); } From 88c8331a2acdca22ea169e93337860e2948fccdd Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 15 Feb 2024 15:04:09 +0100 Subject: [PATCH 15/22] Update rego rules to include all operations know to human kind --- rego/hdfs.rego | 113 +++++++++++++++++++++++++++++++++++++++++--- rego/hdfs_test.rego | 26 +++++----- 2 files changed, 120 insertions(+), 19 deletions(-) diff --git a/rego/hdfs.rego b/rego/hdfs.rego index 2c13e43..d9ac8b4 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -65,16 +65,117 @@ action_hierarchy := { "ro": ["ro"], } +# To get a (hopefully complete) list of actions run "ack 'String operationName = '" in the hadoop source code action_for_operation := { + # The "rename" operation will be actually called on both - the source and the target location. + # Because of this you need to have rw permissions on the source and target file - which is desired + + "abandonBlock": "rw", + "addCacheDirective": "rw", + "addCachePool": "full", + "addErasureCodingPolicies": "full", + "allowSnapshot": "full", + "append": "rw", + "cancelDelegationToken": "ro", + "checkAccess": "ro", + "clearQuota": "full", + "clearSpaceQuota": "full", + "completeFile": "rw", + "computeSnapshotDiff": "full", + "concat": "rw", + "contentSummary": "ro", + "create": "rw", + "createEncryptionZone": "full", + "createSnapshot": "full", + "createSymlink": "rw", + "delete": "rw", + "deleteSnapshot": "full", + "disableErasureCodingPolicy": "full", + "disallowSnapshot": "full", + "enableErasureCodingPolicy": "full", + "finalizeRollingUpgrade": "full", + "fsck": "full", + "fsckGetBlockLocations": "full", + "fsync": "rw", + "gcDeletedSnapshot": "full", + "getAclStatus": "ro", + "getAdditionalBlock": "ro", + "getAdditionalDatanode": "ro", + "getDelegationToken": "ro", + "getECTopologyResultForPolicies": "ro", + "getErasureCodingCodecs": "ro", + "getErasureCodingPolicies": "ro", + "getErasureCodingPolicy": "ro", + "getEZForPath": "ro", "getfileinfo": "ro", + "getPreferredBlockSize": "ro", + "getStoragePolicy": "ro", + "getXAttrs": "ro", + "isFileClosed": "ro", + "listCacheDirectives": "ro", + "listCachePools": "ro", + "listCorruptFileBlocks": "ro", + "listEncryptionZones": "ro", + "listOpenFiles": "ro", + "listReencryptionStatus": "ro", + "ListSnapshot": "ro", # Yeah, this really starts with a capital letter + "listSnapshottableDirectory": "ro", "listStatus": "ro", - "open": "ro", + "listXAttrs": "ro", + "metaSave": "rw", "mkdirs": "rw", - "create": "rw", - "delete": "rw", - # The "rename" operation will be actually called on both - the source and the target location. - # Because of this you need to have rw permissions on the source and target file - which is desired + "modifyAclEntries": "full", + "modifyCacheDirective": "rw", + "modifyCachePool": "full", + "open": "ro", + "queryRollingUpgrade": "ro", + "quotaUsage": "ro", + "recoverLease": "full", + "reencryptEncryptionZone": "full", + "removeAcl": "full", + "removeAclEntries": "full", + "removeCacheDirective": "rw", + "removeCachePool": "full", + "removeDefaultAcl": "full", + "removeErasureCodingPolicy": "full", + "removeXAttr": "rw", "rename": "rw", + "renameSnapshot": "full", + "renewDelegationToken": "ro", + "satisfyStoragePolicy": "full", + "setAcl": "full", + "setErasureCodingPolicy": "full", + "setOwner": "full", + "setPermission": "full", + "setQuota": "full", + "setReplication": "full", + "setSpaceQuota": "full", + "setStoragePolicy": "full", + "setTimes": "rw", + "setXAttr": "rw", + "startRollingUpgrade": "full", + "truncate": "rw", + "unsetErasureCodingPolicy": "full", + "unsetStoragePolicy": "full", +} + +# Actions I think are only relevant for the whole filesystem, and not specific to a file or directory +admin_actions := { + "checkRestoreFailedStorage": "ro", + "datanodeReport": "ro", + "disableRestoreFailedStorage": "full", + "enableRestoreFailedStorage": "full", + "finalizeUpgrade": "rw", + "getDatanodeStorageReport": "ro", + "monitorHealth": "ro", + "refreshNodes": "rw", + "rollEditLog": "rw", + "saveNamespace": "full", + "setBalancerBandwidth": "rw", + "slowDataNodesReport": "ro", + "transitionToActive": "full", + "transitionToObserver": "full", + "transitionToStandby": "full", } groups_for_user := {"admin": ["admins"], "alice": ["developers"], "bob": []} @@ -82,7 +183,7 @@ groups_for_user := {"admin": ["admins"], "alice": ["developers"], "bob": []} acls := [ { "identity": "group:admins", - "action": "rw", + "action": "full", "resource": "hdfs:dir:/", }, { diff --git a/rego/hdfs_test.rego b/rego/hdfs_test.rego index c55af92..d8740df 100644 --- a/rego/hdfs_test.rego +++ b/rego/hdfs_test.rego @@ -8,7 +8,7 @@ test_admin_access_to_slash if { "shortUserName": "admin" }, "path": "/top-level", - "operationName": "delete", + "operationName": "setErasureCodingPolicy", } } @@ -18,7 +18,7 @@ test_admin_access_to_alice if { "shortUserName": "admin" }, "path": "/alice/file", - "operationName": "delete", + "operationName": "create", } } @@ -29,7 +29,7 @@ test_admin_access_to_alice_nested_file if { "shortUserName": "admin" }, "path": "/alice/nested/file", - "operationName": "delete", + "operationName": "create", } } @@ -39,7 +39,7 @@ test_admin_access_to_developers if { "shortUserName": "admin" }, "path": "/developers/file", - "operationName": "getfileinfo", + "operationName": "create", } } @@ -51,7 +51,7 @@ test_alice_access_to_alice_folder if { "shortUserName": "alice" }, "path": "/alice", - "operationName": "getfileinfo", + "operationName": "create", } } @@ -61,7 +61,7 @@ test_alice_access_to_alice if { "shortUserName": "alice" }, "path": "/alice/file", - "operationName": "delete", + "operationName": "create", } } @@ -71,7 +71,7 @@ test_alice_no_access_to_bob if { "shortUserName": "alice" }, "path": "/bob/file", - "operationName": "delete", + "operationName": "open", } } @@ -81,7 +81,7 @@ test_alice_access_to_developers if { "shortUserName": "alice" }, "path": "/developers/file", - "operationName": "delete", + "operationName": "create", } } @@ -95,7 +95,7 @@ test_bob_no_access_to_alice if { "shortUserName": "bob" }, "path": "/alice/file", - "operationName": "delete", + "operationName": "open", } } @@ -105,7 +105,7 @@ test_bob_access_to_bob if { "shortUserName": "bob" }, "path": "/bob/file", - "operationName": "delete", + "operationName": "create", } } @@ -115,7 +115,7 @@ test_bob_ro_access_to_developers if { "shortUserName": "bob" }, "path": "/developers/file", - "operationName": "getfileinfo", + "operationName": "open", } } @@ -125,7 +125,7 @@ test_bob_no_rw_access_to_developers if { "shortUserName": "bob" }, "path": "/developers/file", - "operationName": "delete", + "operationName": "create", } } @@ -135,6 +135,6 @@ test_bob_rw_access_to_developers_special_file if { "shortUserName": "bob" }, "path": "/developers/file-from-bob", - "operationName": "listStatus", # FIXME: Change to operation that needs rw action + "operationName": "create", } } From ef5478f3108a42c0ef5fc08aa01815abce28fc04 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 15 Feb 2024 15:19:54 +0100 Subject: [PATCH 16/22] Update test setup rego rules --- test/stack/11-rego-rules.yaml | 119 +++++++++++++++++++++++++++++++--- 1 file changed, 110 insertions(+), 9 deletions(-) diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index 5e6c30b..7ec02fe 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -30,9 +30,9 @@ data: # ] # ... groups := {group | - raw = groups_for_user[input.username][_] - # Keycloak groups have trailing slashes - group := trim_prefix(raw, "/") + raw = groups_for_user[input.username][_] + # Keycloak groups have trailing slashes + group := trim_prefix(raw, "/") } # Identity mentions the user explicitly @@ -74,16 +74,117 @@ data: "ro": ["ro"], } + # To get a (hopefully complete) list of actions run "ack 'String operationName = '" in the hadoop source code action_for_operation := { + # The "rename" operation will be actually called on both - the source and the target location. + # Because of this you need to have rw permissions on the source and target file - which is desired + + "abandonBlock": "rw", + "addCacheDirective": "rw", + "addCachePool": "full", + "addErasureCodingPolicies": "full", + "allowSnapshot": "full", + "append": "rw", + "cancelDelegationToken": "ro", + "checkAccess": "ro", + "clearQuota": "full", + "clearSpaceQuota": "full", + "completeFile": "rw", + "computeSnapshotDiff": "full", + "concat": "rw", + "contentSummary": "ro", + "create": "rw", + "createEncryptionZone": "full", + "createSnapshot": "full", + "createSymlink": "rw", + "delete": "rw", + "deleteSnapshot": "full", + "disableErasureCodingPolicy": "full", + "disallowSnapshot": "full", + "enableErasureCodingPolicy": "full", + "finalizeRollingUpgrade": "full", + "fsck": "full", + "fsckGetBlockLocations": "full", + "fsync": "rw", + "gcDeletedSnapshot": "full", + "getAclStatus": "ro", + "getAdditionalBlock": "ro", + "getAdditionalDatanode": "ro", + "getDelegationToken": "ro", + "getECTopologyResultForPolicies": "ro", + "getErasureCodingCodecs": "ro", + "getErasureCodingPolicies": "ro", + "getErasureCodingPolicy": "ro", + "getEZForPath": "ro", "getfileinfo": "ro", + "getPreferredBlockSize": "ro", + "getStoragePolicy": "ro", + "getXAttrs": "ro", + "isFileClosed": "ro", + "listCacheDirectives": "ro", + "listCachePools": "ro", + "listCorruptFileBlocks": "ro", + "listEncryptionZones": "ro", + "listOpenFiles": "ro", + "listReencryptionStatus": "ro", + "ListSnapshot": "ro", # Yeah, this really starts with a capital letter + "listSnapshottableDirectory": "ro", "listStatus": "ro", - "open": "ro", + "listXAttrs": "ro", + "metaSave": "rw", "mkdirs": "rw", - "create": "rw", - "delete": "rw", - # The "rename" operation will be actually called on both - the source and the target location. - # Because of this you need to have rw permissions on the source and target file - which is desired + "modifyAclEntries": "full", + "modifyCacheDirective": "rw", + "modifyCachePool": "full", + "open": "ro", + "queryRollingUpgrade": "ro", + "quotaUsage": "ro", + "recoverLease": "full", + "reencryptEncryptionZone": "full", + "removeAcl": "full", + "removeAclEntries": "full", + "removeCacheDirective": "rw", + "removeCachePool": "full", + "removeDefaultAcl": "full", + "removeErasureCodingPolicy": "full", + "removeXAttr": "rw", "rename": "rw", + "renameSnapshot": "full", + "renewDelegationToken": "ro", + "satisfyStoragePolicy": "full", + "setAcl": "full", + "setErasureCodingPolicy": "full", + "setOwner": "full", + "setPermission": "full", + "setQuota": "full", + "setReplication": "full", + "setSpaceQuota": "full", + "setStoragePolicy": "full", + "setTimes": "rw", + "setXAttr": "rw", + "startRollingUpgrade": "full", + "truncate": "rw", + "unsetErasureCodingPolicy": "full", + "unsetStoragePolicy": "full", + } + + # Actions I think are only relevant for the whole filesystem, and not specific to a file or directory + admin_actions := { + "checkRestoreFailedStorage": "ro", + "datanodeReport": "ro", + "disableRestoreFailedStorage": "full", + "enableRestoreFailedStorage": "full", + "finalizeUpgrade": "rw", + "getDatanodeStorageReport": "ro", + "monitorHealth": "ro", + "refreshNodes": "rw", + "rollEditLog": "rw", + "saveNamespace": "full", + "setBalancerBandwidth": "rw", + "slowDataNodesReport": "ro", + "transitionToActive": "full", + "transitionToObserver": "full", + "transitionToStandby": "full", } groups_for_user := {"admin": ["admins"], "alice": ["developers"], "bob": []} @@ -91,7 +192,7 @@ data: acls := [ { "identity": "group:admins", - "action": "rw", + "action": "full", "resource": "hdfs:dir:/", }, { From c58c2fcf700116c3eadb7f0dbce81cdc653043e7 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 16 Feb 2024 11:39:46 +0100 Subject: [PATCH 17/22] rename hdfs-group-mapper -> hdfs-utils --- Dockerfile | 2 +- README.md | 2 +- pom.xml | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 28d5d15..b6d0518 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,3 @@ FROM docker.stackable.tech/stackable/hadoop:3.3.6-stackable0.0.0-dev -COPY --chown=stackable:stackable ./hdfs-group-mapper-0.1.0-SNAPSHOT.jar /stackable/hadoop/share/hadoop/tools/lib/ \ No newline at end of file +COPY --chown=stackable:stackable ./hdfs-utils-0.1.0-SNAPSHOT.jar /stackable/hadoop/share/hadoop/tools/lib/ diff --git a/README.md b/README.md index ca4d442..fb61d37 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ This project contains multiple plugins for Apache Hadoop, which are intended to 3. Not (yet?) in this repository is a [TopologyProvider](https://github.com/stackabletech/hdfs-topology-provider/). ## Installation -Run `mvn package` and put the resulting `target/hdfs-group-mapper-0.1.0-SNAPSHOT.jar` file on your HDFS classpath. +Run `mvn package` and put the resulting `target/hdfs-utils-0.1.0-SNAPSHOT.jar` file on your HDFS classpath. The easiest way to achieve this is to put it in the directory `/stackable/hadoop/share/hadoop/tools/lib/`. The Stackable HDFS already takes care of this, you don't need to do anything in this case. diff --git a/pom.xml b/pom.xml index a8d6edc..ae1759c 100644 --- a/pom.xml +++ b/pom.xml @@ -3,11 +3,11 @@ 4.0.0 tech.stackable - hdfs-group-mapper + hdfs-utils 0.1.0-SNAPSHOT - Apache Hadoop HDFS OpenPolicyAgent Group Mapper - https://github.com/stackabletech/hdfs-group-mapper/ + Apache Hadoop HDFS utils + https://github.com/stackabletech/hdfs-utils/ Stackable GmbH https://stackable.tech @@ -21,11 +21,11 @@ scm:git:${project.scm.url} scm:git:${project.scm.url} HEAD - git@github.com:stackabletech/hdfs-group-mapper.git + git@github.com:stackabletech/hdfs-utils.git GitHub - https://github.com/stackabletech/hdfs-group-mapper/issues + https://github.com/stackabletech/hdfs-utils/issues From c1aaaf695d6709af3a4e2e695248f7cddf258f9d Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 16 Feb 2024 12:55:54 +0100 Subject: [PATCH 18/22] fix API docs --- README.md | 12 ++++-------- rego/hdfs.rego | 9 +-------- test/stack/11-rego-rules.yaml | 9 +-------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index fb61d37..ac716fb 100644 --- a/README.md +++ b/README.md @@ -610,14 +610,10 @@ OPA needs to respond with the list of groups as follows: ```json { - "result": { - "groups": { - "groups": [ - "admin", - "developers" - ] - } - } + "result": [ + "admin", + "developers" + ] } ``` diff --git a/rego/hdfs.rego b/rego/hdfs.rego index d9ac8b4..2586658 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -12,14 +12,7 @@ allow if { action_sufficient_for_operation(acl.action, input.operationName) } -# HDFS group mapper -# This will return the group data in this form: -# "result": { -# "groups": [ -# "admin", -# "developers" -# ] -# ... +# HDFS group mapper (this returns a list of strings) groups := {group | raw = groups_for_user[input.username][_] # Keycloak groups have trailing slashes diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index 7ec02fe..ea218e3 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -21,14 +21,7 @@ data: action_sufficient_for_operation(acl.action, input.operationName) } - # HDFS group mapper - # This will return the group data in this form: - # "result": { - # "groups": [ - # "admin", - # "developers" - # ] - # ... + # HDFS group mapper (this returns a list of strings) groups := {group | raw = groups_for_user[input.username][_] # Keycloak groups have trailing slashes From f6bae85c791f7da9cbda078e8d5ad29cf1f4144b Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 16 Feb 2024 14:29:35 +0100 Subject: [PATCH 19/22] update rego rules with feedback from Lars --- rego/hdfs.rego | 4 ++-- test/stack/11-rego-rules.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rego/hdfs.rego b/rego/hdfs.rego index 2586658..0da9f31 100644 --- a/rego/hdfs.rego +++ b/rego/hdfs.rego @@ -53,7 +53,7 @@ action_sufficient_for_operation(action, operation) if { } action_hierarchy := { - "full": ["full", "rw","ro"], + "full": ["full", "rw", "ro"], "rw": ["rw", "ro"], "ro": ["ro"], } @@ -115,7 +115,6 @@ action_for_operation := { "listSnapshottableDirectory": "ro", "listStatus": "ro", "listXAttrs": "ro", - "metaSave": "rw", "mkdirs": "rw", "modifyAclEntries": "full", "modifyCacheDirective": "rw", @@ -160,6 +159,7 @@ admin_actions := { "enableRestoreFailedStorage": "full", "finalizeUpgrade": "rw", "getDatanodeStorageReport": "ro", + "metaSave": "ro", "monitorHealth": "ro", "refreshNodes": "rw", "rollEditLog": "rw", diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml index ea218e3..c8ed269 100644 --- a/test/stack/11-rego-rules.yaml +++ b/test/stack/11-rego-rules.yaml @@ -62,7 +62,7 @@ data: } action_hierarchy := { - "full": ["full", "rw","ro"], + "full": ["full", "rw", "ro"], "rw": ["rw", "ro"], "ro": ["ro"], } @@ -124,7 +124,6 @@ data: "listSnapshottableDirectory": "ro", "listStatus": "ro", "listXAttrs": "ro", - "metaSave": "rw", "mkdirs": "rw", "modifyAclEntries": "full", "modifyCacheDirective": "rw", @@ -169,6 +168,7 @@ data: "enableRestoreFailedStorage": "full", "finalizeUpgrade": "rw", "getDatanodeStorageReport": "ro", + "metaSave": "ro", "monitorHealth": "ro", "refreshNodes": "rw", "rollEditLog": "rw", From a7930acc129f9c4bd08d3ea79cd19315a93f8e5d Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 16 Feb 2024 14:30:03 +0100 Subject: [PATCH 20/22] pretty-print request body --- .../hadoop/StackableAccessControlEnforcer.java | 9 ++++++++- test/stack/20-hdfs.yaml | 11 +++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java index 81f7a99..dd29054 100644 --- a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java +++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java @@ -117,7 +117,14 @@ public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationConte throw new OpaException.SerializeFailed(e); } - LOG.debug("Request body: {}", body); + String prettyPrinted; + try { + prettyPrinted = json.writerWithDefaultPrettyPrinter().writeValueAsString(query); + } catch (JsonProcessingException e) { + throw new OpaException.SerializeFailed(e); + } + + LOG.debug("Request body:\n{}", prettyPrinted); HttpResponse response = null; try { response = diff --git a/test/stack/20-hdfs.yaml b/test/stack/20-hdfs.yaml index fae8fb8..d4452e6 100644 --- a/test/stack/20-hdfs.yaml +++ b/test/stack/20-hdfs.yaml @@ -50,6 +50,17 @@ spec: # https://github.com/stackabletech/hdfs-operator/blob/main/rust/operator-binary/src/kerberos.rs#L97-L101 # This should be removed so that the mapping implementation can provide this information instead: hadoop.user.group.static.mapping.overrides: "" + config: + logging: + containers: + hdfs: + console: + level: DEBUG + loggers: + ROOT: + level: INFO + tech.stackable.hadoop: + level: DEBUG roleGroups: default: replicas: 2 From 0eaf5bc3e78b67556583b187ca835a5caf637672 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 16 Feb 2024 14:31:01 +0100 Subject: [PATCH 21/22] Apply suggestions from code review Co-authored-by: Nick --- test/stack/20-hdfs.yaml | 4 ++-- test/stack/30-test-hdfs-permissions.yaml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/stack/20-hdfs.yaml b/test/stack/20-hdfs.yaml index d4452e6..847a88e 100644 --- a/test/stack/20-hdfs.yaml +++ b/test/stack/20-hdfs.yaml @@ -32,9 +32,9 @@ spec: dfsReplication: 1 zookeeperConfigMapName: simple-hdfs-znode authentication: - tlsSecretClass: tls + tlsSecretClass: tls # pragma: allowlist secret kerberos: - secretClass: kerberos-default + secretClass: kerberos-default # pragma: allowlist secret nameNodes: envOverrides: &envOverrides HADOOP_CLASSPATH: "/stackable/hadoop/share/hadoop/tools/lib/*.jar" diff --git a/test/stack/30-test-hdfs-permissions.yaml b/test/stack/30-test-hdfs-permissions.yaml index 951ab46..d2f8e6c 100644 --- a/test/stack/30-test-hdfs-permissions.yaml +++ b/test/stack/30-test-hdfs-permissions.yaml @@ -1,3 +1,4 @@ +--- apiVersion: batch/v1 kind: Job metadata: From 7e0ef4bb12e1bdf50a2780a614748b964d4d848e Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 16 Feb 2024 14:49:03 +0100 Subject: [PATCH 22/22] add error logging --- .../tech/stackable/hadoop/StackableAccessControlEnforcer.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java index dd29054..acad300 100644 --- a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java +++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java @@ -121,6 +121,7 @@ public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationConte try { prettyPrinted = json.writerWithDefaultPrettyPrinter().writeValueAsString(query); } catch (JsonProcessingException e) { + LOG.error("Could not pretty print the following request body (but non-pretty print did work): {}", body); throw new OpaException.SerializeFailed(e); }