diff --git a/Dockerfile b/Dockerfile
index 28d5d15..b6d0518 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,3 +1,3 @@
FROM docker.stackable.tech/stackable/hadoop:3.3.6-stackable0.0.0-dev
-COPY --chown=stackable:stackable ./hdfs-group-mapper-0.1.0-SNAPSHOT.jar /stackable/hadoop/share/hadoop/tools/lib/
\ No newline at end of file
+COPY --chown=stackable:stackable ./hdfs-utils-0.1.0-SNAPSHOT.jar /stackable/hadoop/share/hadoop/tools/lib/
diff --git a/README.md b/README.md
index 8a3a13f..ac716fb 100644
--- a/README.md
+++ b/README.md
@@ -1,80 +1,623 @@
-# Stackable Group Mapper for Apache Hadoop
+# Stackable Apache Hadoop utilities
[Stackable Data Platform](https://stackable.tech/) | [Platform Docs](https://docs.stackable.tech/) | [Discussions](https://github.com/orgs/stackabletech/discussions) | [Discord](https://discord.gg/7kZ3BNnCAF)
-This projects is a plugin for Apache Hadoop, which can look up groups for users in an OpenPolicyAgent (OPA) instance.
-It is intended to be used with the [Stackable Data Platform](https://stackable.tech)
+This project contains multiple plugins for Apache Hadoop, which are intended to be used with the [Stackable Data Platform](https://stackable.tech)
-## Description
+1. OpenPolicyAgent (OPA) authorizer: For every action performed in HDFS an OPA instance is asked if the user is allowed to perform the action.
+2. GroupMapper: It can look up group memberships for users from an OPA instance.
+3. Not (yet?) in this repository is a [TopologyProvider](https://github.com/stackabletech/hdfs-topology-provider/).
-HDFS internally uses user groups for group permissions on folders etc. For this reason it is not enough to just have the groups in OPA during authorization, but they actually need to be available to Hadoop. Hadoop offers a few default group providers, such as:
+## Installation
+Run `mvn package` and put the resulting `target/hdfs-utils-0.1.0-SNAPSHOT.jar` file on your HDFS classpath.
+The easiest way to achieve this is to put it in the directory `/stackable/hadoop/share/hadoop/tools/lib/`.
+The Stackable HDFS already takes care of this, you don't need to do anything in this case.
-* LDAP
-* Linux user group (usually provided by SSSD or Centrify or similar tools)
+## OPA authorizer
-Hadoop exposes an [interface](https://github.com/apache/hadoop/blob/rel/release-3.3.6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java) that users can implement to extend these [group mappings](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/GroupsMapping.html). The Stackable Group Mapper does this to look up user groups from OPA.
+> [!IMPORTANT]
+> The authorizer only works when used by an HDFS version that includes fixes from https://github.com/apache/hadoop/pull/6553.
+> Stackable HDFS versions starting with `3.3.4` already contain this patch.
-## OPA Mappings
+### Configuration
-OPA mappings are returned from the [User-Info-Fetcher](https://docs.stackable.tech/home/nightly/opa/usage-guide/user-info-fetcher#_example_rego_rule) in this form:
+- Set `dfs.namenode.inode.attributes.provider.class` in `hdfs-site.xml` to `tech.stackable.hadoop.StackableAuthorizer`
+- Set `hadoop.security.authorization.opa.policy.url` in `core-site.xml` to the HTTP endpoint of your OPA rego rule, e.g. `http://opa.default.svc.cluster.local:8081/v1/data/hdfs/allow`
-```json
-{
- "id": "af07f12c-a2db-40a7-93e0-874537bdf3f5",
- "username": "alice",
- "groups": [
- "/superset-admin"
- ],
- "customAttributes": {}
-}
-```
+### API
+
+For every action a request similar to the following is sent to OPA:
-The Group Mapper only needs the group listing, which can be requested specifically from the Opa server by providing the current user and filtering out the groups with the `json.filter` function, returning a segment that looks like this:
+
+Example request
```json
{
- "result": {
- "groups": {
+ "input": {
+ "fsOwner": "nn",
+ "supergroup": "supergroup",
+ "callerUgi": {
+ "realUser": null,
+ "userName": "alice/test-hdfs-permissions.default.svc.cluster.local@CLUSTER.LOCAL",
+ "shortUserName": "alice",
+ "primaryGroup": "developers",
"groups": [
- "admin",
- "superuser"
- ]
+ "developers"
+ ],
+ "authenticationMethod": "KERBEROS",
+ "realAuthenticationMethod": "KERBEROS"
+ },
+ "inodeAttrs": [
+ {
+ "parent": null,
+ "id": 16385,
+ "modificationTime": 1707990801081,
+ "accessTime": 0,
+ "next": null,
+ "features": [
+ {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ }
+ ],
+ "storagePolicyID": 0,
+ "directorySnapshottableFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "directoryWithQuotaFeature": {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ "directoryWithSnapshotFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "quotaCounts": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ },
+ "localStoragePolicyID": 0,
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "localNameBytes": "",
+ "xattrFeature": null,
+ "groupName": "supergroup",
+ "fsPermission": {
+ "stickyBit": false,
+ "userAction": "ALL",
+ "groupAction": "READ_EXECUTE",
+ "otherAction": "READ_EXECUTE",
+ "masked": null,
+ "unmasked": null,
+ "aclBit": false,
+ "encryptedBit": false,
+ "erasureCodedBit": false
+ },
+ "aclFeature": null,
+ "fsPermissionShort": 493,
+ "permissionLong": 1099511693805,
+ "userName": "nn",
+ "localName": "",
+ "key": "",
+ "fullPathName": "/",
+ "pathComponents": [
+ ""
+ ],
+ "objectString": "INodeDirectory@3ae77112",
+ "parentString": "parent=null",
+ "parentReference": null
+ },
+ {
+ "parent": {
+ "parent": null,
+ "id": 16385,
+ "modificationTime": 1707990801081,
+ "accessTime": 0,
+ "next": null,
+ "features": [
+ {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ }
+ ],
+ "storagePolicyID": 0,
+ "directorySnapshottableFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "directoryWithQuotaFeature": {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ "directoryWithSnapshotFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "quotaCounts": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ },
+ "localStoragePolicyID": 0,
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "localNameBytes": "",
+ "xattrFeature": null,
+ "groupName": "supergroup",
+ "fsPermission": {
+ "stickyBit": false,
+ "userAction": "ALL",
+ "groupAction": "READ_EXECUTE",
+ "otherAction": "READ_EXECUTE",
+ "masked": null,
+ "unmasked": null,
+ "aclBit": false,
+ "encryptedBit": false,
+ "erasureCodedBit": false
+ },
+ "aclFeature": null,
+ "fsPermissionShort": 493,
+ "permissionLong": 1099511693805,
+ "userName": "nn",
+ "localName": "",
+ "key": "",
+ "fullPathName": "/",
+ "pathComponents": [
+ ""
+ ],
+ "objectString": "INodeDirectory@3ae77112",
+ "parentString": "parent=null",
+ "parentReference": null
+ },
+ "id": 16389,
+ "modificationTime": 1707990801081,
+ "accessTime": 0,
+ "next": null,
+ "features": [],
+ "storagePolicyID": 0,
+ "directorySnapshottableFeature": null,
+ "directoryWithQuotaFeature": null,
+ "directoryWithSnapshotFeature": null,
+ "quotaCounts": {
+ "nameSpace": -1,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ },
+ "localStoragePolicyID": 0,
+ "diffs": null,
+ "localNameBytes": "ZGV2ZWxvcGVycy1ybw==",
+ "xattrFeature": null,
+ "groupName": "supergroup",
+ "fsPermission": {
+ "stickyBit": false,
+ "userAction": "ALL",
+ "groupAction": "READ_EXECUTE",
+ "otherAction": "READ_EXECUTE",
+ "masked": null,
+ "unmasked": null,
+ "aclBit": false,
+ "encryptedBit": false,
+ "erasureCodedBit": false
+ },
+ "aclFeature": null,
+ "fsPermissionShort": 493,
+ "permissionLong": 2199023321581,
+ "userName": "admin",
+ "localName": "developers-ro",
+ "key": "ZGV2ZWxvcGVycy1ybw==",
+ "fullPathName": "/developers-ro",
+ "pathComponents": [
+ "",
+ "ZGV2ZWxvcGVycy1ybw=="
+ ],
+ "objectString": "INodeDirectory@1df11410",
+ "parentString": "parentDir=/",
+ "parentReference": null
+ },
+ null
+ ],
+ "inodes": [
+ {
+ "parent": null,
+ "id": 16385,
+ "modificationTime": 1707990801081,
+ "accessTime": 0,
+ "next": null,
+ "features": [
+ {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ }
+ ],
+ "storagePolicyID": 0,
+ "directorySnapshottableFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "directoryWithQuotaFeature": {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ "directoryWithSnapshotFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "quotaCounts": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ },
+ "localStoragePolicyID": 0,
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "localNameBytes": "",
+ "xattrFeature": null,
+ "groupName": "supergroup",
+ "fsPermission": {
+ "stickyBit": false,
+ "userAction": "ALL",
+ "groupAction": "READ_EXECUTE",
+ "otherAction": "READ_EXECUTE",
+ "masked": null,
+ "unmasked": null,
+ "aclBit": false,
+ "encryptedBit": false,
+ "erasureCodedBit": false
+ },
+ "aclFeature": null,
+ "fsPermissionShort": 493,
+ "permissionLong": 1099511693805,
+ "userName": "nn",
+ "localName": "",
+ "key": "",
+ "fullPathName": "/",
+ "pathComponents": [
+ ""
+ ],
+ "objectString": "INodeDirectory@3ae77112",
+ "parentString": "parent=null",
+ "parentReference": null
+ },
+ {
+ "parent": {
+ "parent": null,
+ "id": 16385,
+ "modificationTime": 1707990801081,
+ "accessTime": 0,
+ "next": null,
+ "features": [
+ {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ }
+ ],
+ "storagePolicyID": 0,
+ "directorySnapshottableFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "directoryWithQuotaFeature": {
+ "spaceConsumed": {
+ "nameSpace": 7,
+ "storageSpace": 1338,
+ "typeSpaces": {}
+ },
+ "spaceAllowed": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ }
+ },
+ "directoryWithSnapshotFeature": {
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "snapshotQuota": 0,
+ "numSnapshots": 0,
+ "snapshotList": [],
+ "lastSnapshotId": 2147483646
+ },
+ "quotaCounts": {
+ "nameSpace": 9223372036854775807,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ },
+ "localStoragePolicyID": 0,
+ "diffs": {
+ "last": null,
+ "lastSnapshotId": 2147483646
+ },
+ "localNameBytes": "",
+ "xattrFeature": null,
+ "groupName": "supergroup",
+ "fsPermission": {
+ "stickyBit": false,
+ "userAction": "ALL",
+ "groupAction": "READ_EXECUTE",
+ "otherAction": "READ_EXECUTE",
+ "masked": null,
+ "unmasked": null,
+ "aclBit": false,
+ "encryptedBit": false,
+ "erasureCodedBit": false
+ },
+ "aclFeature": null,
+ "fsPermissionShort": 493,
+ "permissionLong": 1099511693805,
+ "userName": "nn",
+ "localName": "",
+ "key": "",
+ "fullPathName": "/",
+ "pathComponents": [
+ ""
+ ],
+ "objectString": "INodeDirectory@3ae77112",
+ "parentString": "parent=null",
+ "parentReference": null
+ },
+ "id": 16389,
+ "modificationTime": 1707990801081,
+ "accessTime": 0,
+ "next": null,
+ "features": [],
+ "storagePolicyID": 0,
+ "directorySnapshottableFeature": null,
+ "directoryWithQuotaFeature": null,
+ "directoryWithSnapshotFeature": null,
+ "quotaCounts": {
+ "nameSpace": -1,
+ "storageSpace": -1,
+ "typeSpaces": {}
+ },
+ "localStoragePolicyID": 0,
+ "diffs": null,
+ "localNameBytes": "ZGV2ZWxvcGVycy1ybw==",
+ "xattrFeature": null,
+ "groupName": "supergroup",
+ "fsPermission": {
+ "stickyBit": false,
+ "userAction": "ALL",
+ "groupAction": "READ_EXECUTE",
+ "otherAction": "READ_EXECUTE",
+ "masked": null,
+ "unmasked": null,
+ "aclBit": false,
+ "encryptedBit": false,
+ "erasureCodedBit": false
+ },
+ "aclFeature": null,
+ "fsPermissionShort": 493,
+ "permissionLong": 2199023321581,
+ "userName": "admin",
+ "localName": "developers-ro",
+ "key": "ZGV2ZWxvcGVycy1ybw==",
+ "fullPathName": "/developers-ro",
+ "pathComponents": [
+ "",
+ "ZGV2ZWxvcGVycy1ybw=="
+ ],
+ "objectString": "INodeDirectory@1df11410",
+ "parentString": "parentDir=/",
+ "parentReference": null
+ },
+ null
+ ],
+ "pathByNameArr": [
+ "",
+ "ZGV2ZWxvcGVycy1ybw==",
+ "aG9zdHMuX0NPUFlJTkdf"
+ ],
+ "snapshotId": 2147483646,
+ "path": "/developers-ro/hosts._COPYING_",
+ "ancestorIndex": 1,
+ "doCheckOwner": false,
+ "ancestorAccess": null,
+ "parentAccess": null,
+ "access": null,
+ "subAccess": null,
+ "ignoreEmptyDir": false,
+ "operationName": "getfileinfo",
+ "callerContext": {
+ "context": "CLI",
+ "signature": null
}
}
}
```
-The leading slash is required by Opa/Keycloak to allow the definition of subgroups, but this should be removed by the rego rule before returning this list of strings to the mapper.
-
-## Configuration
+
-Group mappings are resolved on the NameNode and the following configuration should be added to the NameNode role:
+## Group mapper
-### envOverrides
+Despites having the OPA authorizer described above there are a few use-cases to have a group mapper as well.
-#### HADOOP_CLASSPATH
+1. Correctly showing group information in HDFS, e.g. for file ownership.
+2. Only use the group mapper without the OAP authorizer
-* Fixed value of `"/stackable/hadoop/share/hadoop/tools/lib/*.jar"`
+Hadoop offers a few default group providers, such as:
-### configOverrides / `core-site.xml`
+* LDAP
+* Linux user group (usually provided by SSSD, Centrify or similar tools)
-#### hadoop.security.group.mapping
+Hadoop exposes an [interface](https://github.com/apache/hadoop/blob/rel/release-3.3.6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java) that users can implement to extend these [group mappings](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/GroupsMapping.html). The Stackable Group Mapper does this to look up user groups from OPA.
-* Fixed value of `"tech.stackable.hadoop.StackableGroupMapper"`
+### Configuration
-#### hadoop.security.group.mapping.opa.url
+- Set `hadoop.security.group.mapping` in `core-site.xml` to `tech.stackable.hadoop.StackableGroupMapper`
+- Set `hadoop.security.group.mapping.opa.policy.url` in `core-site.xml` to the HTTP endpoint of your OPA rego rule, e.g. `http://opa.default.svc.cluster.local:8081/v1/data/hdfs/groups`
+- Make sure to not have set `hadoop.user.group.static.mapping.overrides` in `core-site.xml`, as this clashes with the information the group mapper provides.
-* The Opa Server endpoint e.g. `"http://test-opa.default.svc.cluster.local:8081/v1/data/hdfsgroups"`
+### API
-#### hadoop.security.group.mapping.opa.list.name
+The group mapper sends the following query to OPA:
-* Opa responses have a [root field](https://www.openpolicyagent.org/docs/latest/rest-api/#response-message) called `result`: the result itself - in this case of a list of user groups - is a top-level field within the root field and is configurable i.e. the group mapper just needs to look up this field from the response and this is passed in the configuration. This means that both the output format of the rego rule and the corresponding response field are configured independently of the group mapper.
+```json
+{
+ "input": {
+ "username": "alice"
+ }
+}
+```
-#### hadoop.user.group.static.mapping.overrides
+OPA needs to respond with the list of groups as follows:
-* The hdfs-operator will add a default static mapping whenever kerberos is activated. This should be removed so that the mapping implementation can provide this information instead: i.e. with an empty string `""`
+```json
+{
+ "result": [
+ "admin",
+ "developers"
+ ]
+}
+```
-## Testing
+### Testing
CRDs for spinning up test infrastructure are provided in `test/stack`. The Tiltfile will deploy these resources, build and copy the mapper to the docker image, and re-deploy the image to the running HdfsCluster.
diff --git a/Tiltfile b/Tiltfile
index 63fed7c..a44c8a0 100644
--- a/Tiltfile
+++ b/Tiltfile
@@ -1,7 +1,8 @@
k8s_yaml('test/stack/01-install-krb5-kdc.yaml')
k8s_yaml('test/stack/02-create-kerberos-secretclass.yaml')
-k8s_yaml('test/stack/05-opa.yaml')
-k8s_yaml('test/stack/10-hdfs.yaml')
+k8s_yaml('test/stack/10-opa.yaml')
+k8s_yaml('test/stack/11-rego-rules.yaml')
+k8s_yaml('test/stack/20-hdfs.yaml')
local_resource(
'compile authorizer',
@@ -13,4 +14,4 @@ docker_build(
'./target',
dockerfile='./Dockerfile')
-k8s_kind('HdfsCluster', image_json_path='{.spec.image.custom}')
\ No newline at end of file
+k8s_kind('HdfsCluster', image_json_path='{.spec.image.custom}')
diff --git a/pom.xml b/pom.xml
index a082592..ae1759c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,11 +3,11 @@
4.0.0
tech.stackable
- hdfs-group-mapper
+ hdfs-utils
0.1.0-SNAPSHOT
- Apache Hadoop HDFS OpenPolicyAgent Group Mapper
- https://github.com/stackabletech/hdfs-group-mapper/
+ Apache Hadoop HDFS utils
+ https://github.com/stackabletech/hdfs-utils/
Stackable GmbH
https://stackable.tech
@@ -21,11 +21,11 @@
scm:git:${project.scm.url}
scm:git:${project.scm.url}
HEAD
- git@github.com:stackabletech/hdfs-group-mapper.git
+ git@github.com:stackabletech/hdfs-utils.git
GitHub
- https://github.com/stackabletech/hdfs-group-mapper/issues
+ https://github.com/stackabletech/hdfs-utils/issues
@@ -59,6 +59,12 @@
${hadoop.version}
provided
+
+ org.apache.hadoop
+ hadoop-hdfs
+ ${hadoop.version}
+ provided
+
org.slf4j
slf4j-api
diff --git a/rego/hdfs.rego b/rego/hdfs.rego
new file mode 100644
index 0000000..0da9f31
--- /dev/null
+++ b/rego/hdfs.rego
@@ -0,0 +1,212 @@
+package hdfs
+
+import rego.v1
+
+default allow = false
+
+# HDFS authorizer
+allow if {
+ some acl in acls
+ matches_identity(input.callerUgi.shortUserName, acl.identity)
+ matches_resource(input.path, acl.resource)
+ action_sufficient_for_operation(acl.action, input.operationName)
+}
+
+# HDFS group mapper (this returns a list of strings)
+groups := {group |
+ raw = groups_for_user[input.username][_]
+ # Keycloak groups have trailing slashes
+ group := trim_prefix(raw, "/")
+}
+
+# Identity mentions the user explicitly
+matches_identity(user, identity) if {
+ identity == concat("", ["user:", user])
+}
+
+# Identity mentions group the user is part of
+matches_identity(user, identity) if {
+ some group in groups_for_user[user]
+ identity == concat("", ["group:", group])
+}
+
+# Resource mentions the file explicitly
+matches_resource(file, resource) if {
+ resource == concat("", ["hdfs:file:", file])
+}
+
+# Resource mentions the directory explicitly
+matches_resource(file, resource) if {
+ trim_suffix(resource, "/") == concat("", ["hdfs:dir:", file])
+}
+
+# Resource mentions a folder higher up the tree, which will will grant access recursively
+matches_resource(file, resource) if {
+ startswith(resource, "hdfs:dir:/")
+ # directories need to have a trailing slash
+ endswith(resource, "/")
+ startswith(file, trim_prefix(resource, "hdfs:dir:"))
+}
+
+action_sufficient_for_operation(action, operation) if {
+ action_hierarchy[action][_] == action_for_operation[operation]
+}
+
+action_hierarchy := {
+ "full": ["full", "rw", "ro"],
+ "rw": ["rw", "ro"],
+ "ro": ["ro"],
+}
+
+# To get a (hopefully complete) list of actions run "ack 'String operationName = '" in the hadoop source code
+action_for_operation := {
+ # The "rename" operation will be actually called on both - the source and the target location.
+ # Because of this you need to have rw permissions on the source and target file - which is desired
+
+ "abandonBlock": "rw",
+ "addCacheDirective": "rw",
+ "addCachePool": "full",
+ "addErasureCodingPolicies": "full",
+ "allowSnapshot": "full",
+ "append": "rw",
+ "cancelDelegationToken": "ro",
+ "checkAccess": "ro",
+ "clearQuota": "full",
+ "clearSpaceQuota": "full",
+ "completeFile": "rw",
+ "computeSnapshotDiff": "full",
+ "concat": "rw",
+ "contentSummary": "ro",
+ "create": "rw",
+ "createEncryptionZone": "full",
+ "createSnapshot": "full",
+ "createSymlink": "rw",
+ "delete": "rw",
+ "deleteSnapshot": "full",
+ "disableErasureCodingPolicy": "full",
+ "disallowSnapshot": "full",
+ "enableErasureCodingPolicy": "full",
+ "finalizeRollingUpgrade": "full",
+ "fsck": "full",
+ "fsckGetBlockLocations": "full",
+ "fsync": "rw",
+ "gcDeletedSnapshot": "full",
+ "getAclStatus": "ro",
+ "getAdditionalBlock": "ro",
+ "getAdditionalDatanode": "ro",
+ "getDelegationToken": "ro",
+ "getECTopologyResultForPolicies": "ro",
+ "getErasureCodingCodecs": "ro",
+ "getErasureCodingPolicies": "ro",
+ "getErasureCodingPolicy": "ro",
+ "getEZForPath": "ro",
+ "getfileinfo": "ro",
+ "getPreferredBlockSize": "ro",
+ "getStoragePolicy": "ro",
+ "getXAttrs": "ro",
+ "isFileClosed": "ro",
+ "listCacheDirectives": "ro",
+ "listCachePools": "ro",
+ "listCorruptFileBlocks": "ro",
+ "listEncryptionZones": "ro",
+ "listOpenFiles": "ro",
+ "listReencryptionStatus": "ro",
+ "ListSnapshot": "ro", # Yeah, this really starts with a capital letter
+ "listSnapshottableDirectory": "ro",
+ "listStatus": "ro",
+ "listXAttrs": "ro",
+ "mkdirs": "rw",
+ "modifyAclEntries": "full",
+ "modifyCacheDirective": "rw",
+ "modifyCachePool": "full",
+ "open": "ro",
+ "queryRollingUpgrade": "ro",
+ "quotaUsage": "ro",
+ "recoverLease": "full",
+ "reencryptEncryptionZone": "full",
+ "removeAcl": "full",
+ "removeAclEntries": "full",
+ "removeCacheDirective": "rw",
+ "removeCachePool": "full",
+ "removeDefaultAcl": "full",
+ "removeErasureCodingPolicy": "full",
+ "removeXAttr": "rw",
+ "rename": "rw",
+ "renameSnapshot": "full",
+ "renewDelegationToken": "ro",
+ "satisfyStoragePolicy": "full",
+ "setAcl": "full",
+ "setErasureCodingPolicy": "full",
+ "setOwner": "full",
+ "setPermission": "full",
+ "setQuota": "full",
+ "setReplication": "full",
+ "setSpaceQuota": "full",
+ "setStoragePolicy": "full",
+ "setTimes": "rw",
+ "setXAttr": "rw",
+ "startRollingUpgrade": "full",
+ "truncate": "rw",
+ "unsetErasureCodingPolicy": "full",
+ "unsetStoragePolicy": "full",
+}
+
+# Actions I think are only relevant for the whole filesystem, and not specific to a file or directory
+admin_actions := {
+ "checkRestoreFailedStorage": "ro",
+ "datanodeReport": "ro",
+ "disableRestoreFailedStorage": "full",
+ "enableRestoreFailedStorage": "full",
+ "finalizeUpgrade": "rw",
+ "getDatanodeStorageReport": "ro",
+ "metaSave": "ro",
+ "monitorHealth": "ro",
+ "refreshNodes": "rw",
+ "rollEditLog": "rw",
+ "saveNamespace": "full",
+ "setBalancerBandwidth": "rw",
+ "slowDataNodesReport": "ro",
+ "transitionToActive": "full",
+ "transitionToObserver": "full",
+ "transitionToStandby": "full",
+}
+
+groups_for_user := {"admin": ["admins"], "alice": ["developers"], "bob": []}
+
+acls := [
+ {
+ "identity": "group:admins",
+ "action": "full",
+ "resource": "hdfs:dir:/",
+ },
+ {
+ "identity": "group:developers",
+ "action": "rw",
+ "resource": "hdfs:dir:/developers/",
+ },
+ {
+ "identity": "group:developers",
+ "action": "ro",
+ "resource": "hdfs:dir:/developers-ro/",
+ },
+ {
+ "identity": "user:alice",
+ "action": "rw",
+ "resource": "hdfs:dir:/alice/",
+ },
+ {
+ "identity": "user:bob",
+ "action": "rw",
+ "resource": "hdfs:dir:/bob/",
+ },
+ {
+ "identity": "user:bob",
+ "action": "ro",
+ "resource": "hdfs:dir:/developers/",
+ },
+ {
+ "identity": "user:bob",
+ "action": "rw",
+ "resource": "hdfs:file:/developers/file-from-bob",
+ },
+]
diff --git a/rego/hdfs_test.rego b/rego/hdfs_test.rego
new file mode 100644
index 0000000..d8740df
--- /dev/null
+++ b/rego/hdfs_test.rego
@@ -0,0 +1,140 @@
+package hdfs
+
+import rego.v1
+
+test_admin_access_to_slash if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "admin"
+ },
+ "path": "/top-level",
+ "operationName": "setErasureCodingPolicy",
+ }
+}
+
+test_admin_access_to_alice if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "admin"
+ },
+ "path": "/alice/file",
+ "operationName": "create",
+ }
+}
+
+
+test_admin_access_to_alice_nested_file if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "admin"
+ },
+ "path": "/alice/nested/file",
+ "operationName": "create",
+ }
+}
+
+test_admin_access_to_developers if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "admin"
+ },
+ "path": "/developers/file",
+ "operationName": "create",
+ }
+}
+
+
+
+test_alice_access_to_alice_folder if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "alice"
+ },
+ "path": "/alice",
+ "operationName": "create",
+ }
+}
+
+test_alice_access_to_alice if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "alice"
+ },
+ "path": "/alice/file",
+ "operationName": "create",
+ }
+}
+
+test_alice_no_access_to_bob if {
+ not allow with input as {
+ "callerUgi": {
+ "shortUserName": "alice"
+ },
+ "path": "/bob/file",
+ "operationName": "open",
+ }
+}
+
+test_alice_access_to_developers if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "alice"
+ },
+ "path": "/developers/file",
+ "operationName": "create",
+ }
+}
+
+
+
+
+
+test_bob_no_access_to_alice if {
+ not allow with input as {
+ "callerUgi": {
+ "shortUserName": "bob"
+ },
+ "path": "/alice/file",
+ "operationName": "open",
+ }
+}
+
+test_bob_access_to_bob if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "bob"
+ },
+ "path": "/bob/file",
+ "operationName": "create",
+ }
+}
+
+test_bob_ro_access_to_developers if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "bob"
+ },
+ "path": "/developers/file",
+ "operationName": "open",
+ }
+}
+
+test_bob_no_rw_access_to_developers if {
+ not allow with input as {
+ "callerUgi": {
+ "shortUserName": "bob"
+ },
+ "path": "/developers/file",
+ "operationName": "create",
+ }
+}
+
+test_bob_rw_access_to_developers_special_file if {
+ allow with input as {
+ "callerUgi": {
+ "shortUserName": "bob"
+ },
+ "path": "/developers/file-from-bob",
+ "operationName": "create",
+ }
+}
diff --git a/src/main/java/tech/stackable/hadoop/HadoopConfigSingleton.java b/src/main/java/tech/stackable/hadoop/HadoopConfigSingleton.java
new file mode 100644
index 0000000..5419681
--- /dev/null
+++ b/src/main/java/tech/stackable/hadoop/HadoopConfigSingleton.java
@@ -0,0 +1,12 @@
+package tech.stackable.hadoop;
+
+import org.apache.hadoop.conf.Configuration;
+
+public enum HadoopConfigSingleton {
+ INSTANCE;
+ private final Configuration configuration = new Configuration();
+
+ public Configuration getConfiguration() {
+ return this.configuration;
+ }
+}
diff --git a/src/main/java/tech/stackable/hadoop/OpaAllowQuery.java b/src/main/java/tech/stackable/hadoop/OpaAllowQuery.java
new file mode 100644
index 0000000..1c6737c
--- /dev/null
+++ b/src/main/java/tech/stackable/hadoop/OpaAllowQuery.java
@@ -0,0 +1,51 @@
+package tech.stackable.hadoop;
+
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider;
+
+public class OpaAllowQuery {
+ public final OpaAllowQueryInput input;
+
+ public OpaAllowQuery(OpaAllowQueryInput input) {
+ this.input = input;
+ }
+
+ public static class OpaAllowQueryInput {
+ public java.lang.String fsOwner;
+ public java.lang.String supergroup;
+ public OpaQueryUgi callerUgi;
+ public org.apache.hadoop.hdfs.server.namenode.INodeAttributes[] inodeAttrs;
+ public org.apache.hadoop.hdfs.server.namenode.INode[] inodes;
+ public byte[][] pathByNameArr;
+ public int snapshotId;
+ public java.lang.String path;
+ public int ancestorIndex;
+ public boolean doCheckOwner;
+ public org.apache.hadoop.fs.permission.FsAction ancestorAccess;
+ public org.apache.hadoop.fs.permission.FsAction parentAccess;
+ public org.apache.hadoop.fs.permission.FsAction access;
+ public org.apache.hadoop.fs.permission.FsAction subAccess;
+ public boolean ignoreEmptyDir;
+ public java.lang.String operationName;
+ public org.apache.hadoop.ipc.CallerContext callerContext;
+
+ public OpaAllowQueryInput(INodeAttributeProvider.AuthorizationContext context) {
+ this.fsOwner = context.getFsOwner();
+ this.supergroup = context.getSupergroup();
+ this.callerUgi = new OpaQueryUgi(context.getCallerUgi());
+ this.inodeAttrs = context.getInodeAttrs();
+ this.inodes = context.getInodes();
+ this.pathByNameArr = context.getPathByNameArr();
+ this.snapshotId = context.getSnapshotId();
+ this.path = context.getPath();
+ this.ancestorIndex = context.getAncestorIndex();
+ this.doCheckOwner = context.isDoCheckOwner();
+ this.ancestorAccess = context.getAncestorAccess();
+ this.parentAccess = context.getParentAccess();
+ this.access = context.getAccess();
+ this.subAccess = context.getSubAccess();
+ this.ignoreEmptyDir = context.isIgnoreEmptyDir();
+ this.operationName = context.getOperationName();
+ this.callerContext = context.getCallerContext();
+ }
+ }
+}
diff --git a/src/main/java/tech/stackable/hadoop/OpaException.java b/src/main/java/tech/stackable/hadoop/OpaException.java
index 0aaacc8..01f2f08 100644
--- a/src/main/java/tech/stackable/hadoop/OpaException.java
+++ b/src/main/java/tech/stackable/hadoop/OpaException.java
@@ -11,6 +11,15 @@ protected OpaException(String message, Throwable cause) {
super(message, cause);
}
+ public static final class UriMissing extends OpaException {
+ public UriMissing(String configuration) {
+ super("No Open Policy Agent URI provided (must be set in the configuration \""
+ + configuration
+ + "\")",
+ null);
+ }
+ }
+
public static final class UriInvalid extends OpaException {
public UriInvalid(URI uri, Throwable cause) {
super(
@@ -32,6 +41,11 @@ public EndPointNotFound(String url) {
null);
}
}
+ public static final class QueryFailed extends OpaException {
+ public QueryFailed(Throwable cause) {
+ super("Failed to query OPA backend", cause);
+ }
+ }
public static final class OpaServerError extends OpaException {
public OpaServerError(String query, HttpResponse response) {
@@ -45,4 +59,16 @@ public OpaServerError(String query, HttpResponse response) {
null);
}
}
+
+ public static final class SerializeFailed extends OpaException {
+ public SerializeFailed(Throwable cause) {
+ super("Failed to serialize OPA query context", cause);
+ }
+ }
+
+ public static final class DeserializeFailed extends OpaException {
+ public DeserializeFailed(Throwable cause) {
+ super("Failed to deserialize OPA policy response", cause);
+ }
+ }
}
diff --git a/src/main/java/tech/stackable/hadoop/OpaQuery.java b/src/main/java/tech/stackable/hadoop/OpaGroupsQuery.java
similarity index 50%
rename from src/main/java/tech/stackable/hadoop/OpaQuery.java
rename to src/main/java/tech/stackable/hadoop/OpaGroupsQuery.java
index a65472b..506973a 100644
--- a/src/main/java/tech/stackable/hadoop/OpaQuery.java
+++ b/src/main/java/tech/stackable/hadoop/OpaGroupsQuery.java
@@ -2,30 +2,30 @@
import java.util.StringJoiner;
-public class OpaQuery {
- public final OpaQueryInput input;
+public class OpaGroupsQuery {
+ public final OpaGroupsQueryInput input;
- public OpaQuery(OpaQueryInput input) {
+ public OpaGroupsQuery(OpaGroupsQueryInput input) {
this.input = input;
}
@Override
public String toString() {
- return new StringJoiner(", ", OpaQuery.class.getSimpleName() + "[", "]")
+ return new StringJoiner(", ", OpaGroupsQuery.class.getSimpleName() + "[", "]")
.add("input=" + input)
.toString();
}
- public static class OpaQueryInput {
+ public static class OpaGroupsQueryInput {
public final String username;
- public OpaQueryInput(String user) {
+ public OpaGroupsQueryInput(String user) {
this.username = user;
}
@Override
public String toString() {
- return new StringJoiner(", ", OpaQueryInput.class.getSimpleName() + "[", "]")
+ return new StringJoiner(", ", OpaGroupsQueryInput.class.getSimpleName() + "[", "]")
.add("username='" + username + "'")
.toString();
}
diff --git a/src/main/java/tech/stackable/hadoop/OpaQueryUgi.java b/src/main/java/tech/stackable/hadoop/OpaQueryUgi.java
new file mode 100644
index 0000000..a3e5c3b
--- /dev/null
+++ b/src/main/java/tech/stackable/hadoop/OpaQueryUgi.java
@@ -0,0 +1,32 @@
+package tech.stackable.hadoop;
+
+import org.apache.hadoop.security.UserGroupInformation;
+
+import java.io.IOException;
+import java.util.List;
+
+public class OpaQueryUgi {
+ public UserGroupInformation realUser;
+ public String userName;
+ public String shortUserName;
+
+ public String primaryGroup;
+ public List groups;
+
+ public UserGroupInformation.AuthenticationMethod authenticationMethod;
+ public UserGroupInformation.AuthenticationMethod realAuthenticationMethod;
+
+ public OpaQueryUgi(UserGroupInformation ugi) {
+ this.realUser = ugi.getRealUser();
+ this.userName = ugi.getUserName();
+ this.shortUserName = ugi.getShortUserName();
+ try {
+ this.primaryGroup = ugi.getPrimaryGroupName();
+ } catch (IOException e) {
+ this.primaryGroup = null;
+ }
+ this.groups = ugi.getGroups();
+ this.authenticationMethod = ugi.getAuthenticationMethod();
+ this.realAuthenticationMethod = ugi.getRealAuthenticationMethod();
+ }
+}
diff --git a/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java
new file mode 100644
index 0000000..acad300
--- /dev/null
+++ b/src/main/java/tech/stackable/hadoop/StackableAccessControlEnforcer.java
@@ -0,0 +1,171 @@
+package tech.stackable.hadoop;
+
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.PropertyAccessor;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
+import org.apache.hadoop.hdfs.server.namenode.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider;
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.util.Objects;
+
+// As of 2024-02-09 INodeAttributeProvider.AccessControlEnforcer has two functions: The old - deprecated -
+// checkPermission and the new checkPermissionWithContext. HDFS uses reflection to check if the authorizer
+// supports the new API (which we do) and uses that in this case. This is also indicated by the log statement
+// "Use the new authorization provider API" during startup, see https://github.com/apache/hadoop/blob/50d256ef3c2531563bc6ba96dec6b78e154b4697/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java#L245
+// FSPermissionChecker (as a caller of the AccessControlEnforcer interface) has a ThreadLocal operationType, which
+// needs to be set to e.g. "create", "delete" or "rename" prior to calling the FSPermissionChecker.checkPermission
+// function, as it will actually check if operationType is null and will still use the old API in this case! But the old
+// API does not have the information about the operationType, which makes it hard to impossible to authorize the request.
+// As a consequence we only support the new API and will make sure no HDFS code path calls the old API. This required
+// minor patches to HDFS, as it was e.g. missing a call to FSPermissionChecker.setOperationType("create") in
+// FSNamesystem.startFileInt (this claim needs to be validated though).
+public class StackableAccessControlEnforcer implements INodeAttributeProvider.AccessControlEnforcer {
+
+ private static final Logger LOG = LoggerFactory.getLogger(StackableAccessControlEnforcer.class);
+
+ public static final String OPA_POLICY_URL_PROP = "hadoop.security.authorization.opa.policy.url";
+
+ private final HttpClient httpClient = HttpClient.newHttpClient();
+ private final ObjectMapper json;
+ private URI opaUri;
+
+ public StackableAccessControlEnforcer() {
+ LOG.debug("Starting StackableAccessControlEnforcer");
+
+ // Guaranteed to be only called once (Effective Java: Item 3)
+ Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration();
+
+ String opaPolicyUrl = configuration.get(OPA_POLICY_URL_PROP);
+ if (opaPolicyUrl == null) {
+ throw new OpaException.UriMissing(OPA_POLICY_URL_PROP);
+ }
+
+ try {
+ this.opaUri = URI.create(opaPolicyUrl);
+ } catch (Exception e) {
+ throw new OpaException.UriInvalid(opaUri, e);
+ }
+
+ this.json = new ObjectMapper()
+ // OPA server can send other fields, such as `decision_id`` when enabling decision logs
+ // We could add all the fields we *currently* know, but it's more future-proof to ignore any unknown fields
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+ // Previously we were getting
+ // Caused by: com.fasterxml.jackson.databind.exc.InvalidDefinitionException: No serializer found for class org.apache.hadoop.hdfs.util.EnumCounters and no properties discovered to create BeanSerializer (to avoid exception, disable SerializationFeature.FAIL_ON_EMPTY_BEANS) (through reference chain: tech.stackable.HdfsOpaAccessControlEnforcer$ContextWrapper["inodeAttrs"]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory[0]->org.apache.hadoop.hdfs.server.namenode.INodeDirectory["features"]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature[0]->org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature["spaceConsumed"]->org.apache.hadoop.hdfs.server.namenode.QuotaCounts["typeSpaces"])
+ .configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false)
+ // Only include the needed fields. HDFS has many classes with even more circular reference to remove
+ .setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE)
+ .setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.PUBLIC_ONLY)
+ .setVisibility(PropertyAccessor.GETTER, JsonAutoDetect.Visibility.PUBLIC_ONLY)
+ // We need to remove some circular pointers (e.g. root -> children[0] -> parent -> root)
+ // Otherwise we get com.fasterxml.jackson.databind.JsonMappingException: Infinite recursion (StackOverflowError)
+ .addMixIn(DatanodeDescriptor.class, DatanodeDescriptorMixin.class);
+
+ LOG.debug("Started HdfsOpaAccessControlEnforcer");
+ }
+
+ private static class OpaQueryResult {
+ // Boxed Boolean to detect not-present vs explicitly false
+ public Boolean result;
+ }
+
+ @Override
+ public void checkPermission(String fsOwner, String supergroup,
+ UserGroupInformation ugi, INodeAttributes[] inodeAttrs,
+ INode[] inodes, byte[][] pathByNameArr, int snapshotId, String path,
+ int ancestorIndex, boolean doCheckOwner, FsAction ancestorAccess,
+ FsAction parentAccess, FsAction access, FsAction subAccess,
+ boolean ignoreEmptyDir) throws AccessControlException {
+ LOG.warn("checkPermission called");
+
+ new Throwable().printStackTrace();
+ throw new AccessControlException("The HdfsOpaAccessControlEnforcer does not implement the old checkPermission API. " +
+ "This should not happen, as all HDFS code paths should call the new API. " +
+ "I dumped the stack trace for you (check active namenode logs), so you can figure out which code path it was. " +
+ "Please report all of that to author of the OPA authorizer (We don't have a stable GitHub link yet, sorry!) " +
+ "Passed arguments: " +
+ "fsOwner: " + fsOwner + ", supergroup: " + supergroup + ", ugi: " + ugi + ", path: " + path + ", ancestorIndex:" + ancestorIndex +
+ ", doCheckOwner: " + doCheckOwner + ", ancestorAccess: " + ancestorAccess + ", parentAccess: " + parentAccess +
+ ", subAccess: " + subAccess + ", ignoreEmptyDir: " + ignoreEmptyDir);
+ }
+
+ @Override
+ public void checkPermissionWithContext(INodeAttributeProvider.AuthorizationContext authzContext) throws AccessControlException {
+ OpaAllowQuery query = new OpaAllowQuery(new OpaAllowQuery.OpaAllowQueryInput(authzContext));
+
+ String body;
+ try {
+ body = json.writeValueAsString(query);
+ } catch (JsonProcessingException e) {
+ throw new OpaException.SerializeFailed(e);
+ }
+
+ String prettyPrinted;
+ try {
+ prettyPrinted = json.writerWithDefaultPrettyPrinter().writeValueAsString(query);
+ } catch (JsonProcessingException e) {
+ LOG.error("Could not pretty print the following request body (but non-pretty print did work): {}", body);
+ throw new OpaException.SerializeFailed(e);
+ }
+
+ LOG.debug("Request body:\n{}", prettyPrinted);
+ HttpResponse response = null;
+ try {
+ response =
+ httpClient.send(
+ HttpRequest.newBuilder(opaUri)
+ .header("Content-Type", "application/json")
+ .POST(HttpRequest.BodyPublishers.ofString(body))
+ .build(),
+ HttpResponse.BodyHandlers.ofString());
+ LOG.debug("Opa response: {}", response.body());
+ } catch (Exception e) {
+ LOG.error(e.getMessage());
+ throw new OpaException.QueryFailed(e);
+ }
+
+ switch (Objects.requireNonNull(response).statusCode()) {
+ case 200:
+ break;
+ case 404:
+ throw new OpaException.EndPointNotFound(opaUri.toString());
+ default:
+ throw new OpaException.OpaServerError(query.toString(), response);
+ }
+
+ OpaQueryResult result;
+ try {
+ result = json.readValue(response.body(), OpaQueryResult.class);
+ } catch (JsonProcessingException e) {
+ throw new OpaException.DeserializeFailed(e);
+ }
+
+ if (result.result == null || !result.result) {
+ throw new AccessControlException("OPA denied the request");
+ }
+ }
+
+ private abstract static class DatanodeDescriptorMixin {
+ @JsonIgnore
+ abstract INode getParent();
+ @JsonIgnore
+ abstract DatanodeStorageInfo[] getStorageInfos();
+ }
+}
diff --git a/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java b/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java
new file mode 100644
index 0000000..8f63e7d
--- /dev/null
+++ b/src/main/java/tech/stackable/hadoop/StackableAuthorizer.java
@@ -0,0 +1,32 @@
+package tech.stackable.hadoop;
+
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributeProvider;
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class StackableAuthorizer extends INodeAttributeProvider {
+
+ private static final Logger LOG = LoggerFactory.getLogger(StackableAuthorizer.class);
+
+ @Override
+ public void start() {
+ LOG.debug("Starting HdfsOpaAuthorizer");
+ }
+
+ @Override
+ public void stop() {
+ LOG.debug("Stopping HdfsOpaAuthorizer");
+ }
+
+ @Override
+ public INodeAttributes getAttributes(String[] strings, INodeAttributes iNodeAttributes) {
+ // No special attributes needed
+ return iNodeAttributes;
+ }
+
+ @Override
+ public AccessControlEnforcer getExternalAccessControlEnforcer(AccessControlEnforcer defaultEnforcer) {
+ return new StackableAccessControlEnforcer();
+ }
+}
diff --git a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java
index 00a0b7e..b1ee34a 100644
--- a/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java
+++ b/src/main/java/tech/stackable/hadoop/StackableGroupMapper.java
@@ -1,9 +1,9 @@
package tech.stackable.hadoop;
import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
-import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
@@ -12,6 +12,8 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
+
+import com.fasterxml.jackson.databind.type.TypeFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.GroupMappingServiceProvider;
import org.slf4j.Logger;
@@ -20,33 +22,22 @@
public class StackableGroupMapper implements GroupMappingServiceProvider {
private static final Logger LOG = LoggerFactory.getLogger(StackableGroupMapper.class);
- public static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.url";
- private static final String OPA_MAPPING_GROUP_NAME_PROP =
- "hadoop.security.group.mapping.opa.list.name";
+
+ public static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.policy.url";
// response base field: see https://www.openpolicyagent.org/docs/latest/rest-api/#response-message
private static final String OPA_RESULT_FIELD = "result";
- private final String mappingGroupName;
private final HttpClient httpClient = HttpClient.newHttpClient();
private final ObjectMapper json;
private URI opaUri;
- public enum HadoopConfig {
- INSTANCE;
- private final Configuration configuration = new Configuration();
-
- public Configuration getConfiguration() {
- return this.configuration;
- }
- }
-
public StackableGroupMapper() {
- // guaranteed to be only called once (Effective Java: Item 3)
- Configuration configuration = HadoopConfig.INSTANCE.getConfiguration();
+ // Guaranteed to be only called once (Effective Java: Item 3)
+ Configuration configuration = HadoopConfigSingleton.INSTANCE.getConfiguration();
String opaMappingUrl = configuration.get(OPA_MAPPING_URL_PROP);
if (opaMappingUrl == null) {
- throw new RuntimeException("Config \"" + OPA_MAPPING_URL_PROP + "\" missing");
+ throw new OpaException.UriMissing(OPA_MAPPING_URL_PROP);
}
try {
@@ -55,13 +46,7 @@ public StackableGroupMapper() {
throw new OpaException.UriInvalid(opaUri, e);
}
- this.mappingGroupName = configuration.get(OPA_MAPPING_GROUP_NAME_PROP);
- if (mappingGroupName == null) {
- throw new RuntimeException("Config \"" + OPA_MAPPING_GROUP_NAME_PROP + "\" missing");
- }
-
- LOG.info("OPA mapping URL [{}]", opaMappingUrl);
- LOG.info("OPA mapping group [{}]", mappingGroupName);
+ LOG.debug("OPA mapping URL: {}", opaMappingUrl);
this.json =
new ObjectMapper()
@@ -70,10 +55,14 @@ public StackableGroupMapper() {
// We could add all the fields we *currently* know, but it's more future-proof to ignore
// any unknown fields.
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
- // do not include null values
+ // Do not include null values
.setSerializationInclusion(JsonInclude.Include.NON_NULL);
}
+ private static class OpaQueryResult {
+ public List result;
+ }
+
/**
* Returns list of groups for a user.
*
@@ -81,13 +70,19 @@ public StackableGroupMapper() {
* @return list of groups for a given user
*/
@Override
- public List getGroups(String user) throws IOException {
- LOG.info("Calling StackableGroupMapper.getGroups for user [{}]", user);
+ public List getGroups(String user) {
+ LOG.info("Calling StackableGroupMapper.getGroups for user \"{}\"", user);
+
+ OpaGroupsQuery query = new OpaGroupsQuery(new OpaGroupsQuery.OpaGroupsQueryInput(user));
- OpaQuery query = new OpaQuery(new OpaQuery.OpaQueryInput(user));
- String body = json.writeValueAsString(query);
+ String body;
+ try {
+ body = json.writeValueAsString(query);
+ } catch (JsonProcessingException e) {
+ throw new OpaException.SerializeFailed(e);
+ }
- LOG.debug("Request body [{}]", body);
+ LOG.debug("Request body: {}", body);
HttpResponse response = null;
try {
response =
@@ -97,9 +92,10 @@ public List getGroups(String user) throws IOException {
.POST(HttpRequest.BodyPublishers.ofString(body))
.build(),
HttpResponse.BodyHandlers.ofString());
- LOG.info("Opa response [{}]", response.body());
- } catch (InterruptedException e) {
+ LOG.debug("Opa response: {}", response.body());
+ } catch (Exception e) {
LOG.error(e.getMessage());
+ throw new OpaException.QueryFailed(e);
}
switch (Objects.requireNonNull(response).statusCode()) {
@@ -111,15 +107,15 @@ public List getGroups(String user) throws IOException {
throw new OpaException.OpaServerError(query.toString(), response);
}
- String responseBody = response.body();
- LOG.debug("Response body [{}]", responseBody);
-
- @SuppressWarnings("unchecked")
- Map result =
- (Map) json.readValue(responseBody, HashMap.class).get(OPA_RESULT_FIELD);
- List groups = (List) result.get(this.mappingGroupName);
+ OpaQueryResult result;
+ try {
+ result = json.readValue(response.body(), OpaQueryResult.class);
+ } catch (JsonProcessingException e) {
+ throw new OpaException.DeserializeFailed(e);
+ }
+ List groups = result.result;
- LOG.info("Groups for [{}]: [{}]", user, groups);
+ LOG.debug("Groups for \"{}\": {}", user, groups);
return groups;
}
@@ -128,7 +124,7 @@ public List getGroups(String user) throws IOException {
@Override
public void cacheGroupsRefresh() {
// does nothing in this provider of user to groups mapping
- LOG.info("ignoring cacheGroupsRefresh: caching should be provided by the policy provider");
+ LOG.debug("ignoring cacheGroupsRefresh: caching should be provided by the policy provider");
}
/**
@@ -139,7 +135,7 @@ public void cacheGroupsRefresh() {
@Override
public void cacheGroupsAdd(List groups) {
// does nothing in this provider of user to groups mapping
- LOG.info(
+ LOG.debug(
"ignoring cacheGroupsAdd for groups [{}]: caching should be provided by the policy provider",
groups);
}
diff --git a/test/stack/05-opa.yaml b/test/stack/05-opa.yaml
deleted file mode 100644
index f4f4315..0000000
--- a/test/stack/05-opa.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
----
-apiVersion: v1
-kind: ConfigMap
-metadata:
- name: test
- labels:
- opa.stackable.tech/bundle: "hdfs-group-mapping"
-data:
- test.rego: |
- package hdfsgroups
-
- # this will return the group data in this form:
- # "result": {
- # "groups": [
- # "admin",
- # "superuser"
- # ]
- # ...
- groups := {g |
- raw = users_by_name[input.username].groups[_]
- g := trim(raw, "/")
- }
-
- # returning data in the form presented by the UIF
- users_by_name := {
- "alice": {
- "id": "af07f12c-1234-40a7-93e0-874537bdf3f5",
- "username": "alice",
- "groups": ["/superset-admin"],
- "customAttributes": {},
- },
- "bob": {
- "id": "af07f12c-2345-40a7-93e0-874537bdf3f5",
- "username": "bob",
- "groups": ["/admin"],
- "customAttributes": {},
- },
- "stackable": {
- "id": "af07f12c-3456-40a7-93e0-874537bdf3f5",
- "username": "stackable",
- "groups": ["/admin", "/superuser"],
- "customAttributes": {},
- },
- # Hadoop will use the short-name for group mappings
- "nn": {
- "id": "af07f12c-7890-40a7-93e0-874537bdf3f5",
- "username": "nn",
- "groups": ["/admin", "/superuser"],
- "customAttributes": {},
- },
- }
-
----
-apiVersion: opa.stackable.tech/v1alpha1
-kind: OpaCluster
-metadata:
- name: test-opa
-spec:
- image:
- productVersion: 0.57.0
- pullPolicy: IfNotPresent
- servers:
- roleGroups:
- default: {}
diff --git a/test/stack/10-hdfs.yaml b/test/stack/10-hdfs.yaml
deleted file mode 100644
index a03961c..0000000
--- a/test/stack/10-hdfs.yaml
+++ /dev/null
@@ -1,60 +0,0 @@
----
-apiVersion: zookeeper.stackable.tech/v1alpha1
-kind: ZookeeperCluster
-metadata:
- name: simple-zk
-spec:
- image:
- productVersion: 3.8.3
- servers:
- roleGroups:
- default:
- replicas: 1
----
-apiVersion: zookeeper.stackable.tech/v1alpha1
-kind: ZookeeperZnode
-metadata:
- name: simple-hdfs-znode
-spec:
- clusterRef:
- name: simple-zk
----
-apiVersion: hdfs.stackable.tech/v1alpha1
-kind: HdfsCluster
-metadata:
- name: simple-hdfs
-spec:
- image:
- productVersion: 3.3.6
- custom: hdfs
- clusterConfig:
- dfsReplication: 1
- zookeeperConfigMapName: simple-hdfs-znode
- authentication:
- tlsSecretClass: tls
- kerberos:
- secretClass: kerberos-default
- nameNodes:
- roleGroups:
- default:
- envOverrides:
- HADOOP_CLASSPATH: "/stackable/hadoop/share/hadoop/tools/lib/*.jar"
- configOverrides:
- core-site.xml:
- # the mapper is only handled on the namenode so no need to apply this config to all roles
- hadoop.security.group.mapping: "tech.stackable.hadoop.StackableGroupMapper"
- hadoop.security.group.mapping.opa.url: "http://test-opa.default.svc.cluster.local:8081/v1/data/hdfsgroups"
- hadoop.security.group.mapping.opa.list.name: "groups"
- # The operator adds a default static mapping when kerberos is activated, see:
- # https://github.com/stackabletech/hdfs-operator/blob/main/rust/operator-binary/src/kerberos.rs#L97-L101
- # This should be removed so that the mapping implementation can provide this information instead:
- hadoop.user.group.static.mapping.overrides: ""
- replicas: 2
- dataNodes:
- roleGroups:
- default:
- replicas: 1
- journalNodes:
- roleGroups:
- default:
- replicas: 1
diff --git a/test/stack/10-opa.yaml b/test/stack/10-opa.yaml
new file mode 100644
index 0000000..9d3635f
--- /dev/null
+++ b/test/stack/10-opa.yaml
@@ -0,0 +1,12 @@
+---
+apiVersion: opa.stackable.tech/v1alpha1
+kind: OpaCluster
+metadata:
+ name: opa
+spec:
+ image:
+ productVersion: 0.61.0 # Needed for OPA rego v1
+ pullPolicy: IfNotPresent
+ servers:
+ roleGroups:
+ default: {}
diff --git a/test/stack/11-rego-rules.yaml b/test/stack/11-rego-rules.yaml
new file mode 100644
index 0000000..c8ed269
--- /dev/null
+++ b/test/stack/11-rego-rules.yaml
@@ -0,0 +1,221 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: hdfs-regorules
+ labels:
+ opa.stackable.tech/bundle: "true"
+data:
+ hdfs.rego: |
+ package hdfs
+
+ import rego.v1
+
+ default allow = false
+
+ # HDFS authorizer
+ allow if {
+ some acl in acls
+ matches_identity(input.callerUgi.shortUserName, acl.identity)
+ matches_resource(input.path, acl.resource)
+ action_sufficient_for_operation(acl.action, input.operationName)
+ }
+
+ # HDFS group mapper (this returns a list of strings)
+ groups := {group |
+ raw = groups_for_user[input.username][_]
+ # Keycloak groups have trailing slashes
+ group := trim_prefix(raw, "/")
+ }
+
+ # Identity mentions the user explicitly
+ matches_identity(user, identity) if {
+ identity == concat("", ["user:", user])
+ }
+
+ # Identity mentions group the user is part of
+ matches_identity(user, identity) if {
+ some group in groups_for_user[user]
+ identity == concat("", ["group:", group])
+ }
+
+ # Resource mentions the file explicitly
+ matches_resource(file, resource) if {
+ resource == concat("", ["hdfs:file:", file])
+ }
+
+ # Resource mentions the directory explicitly
+ matches_resource(file, resource) if {
+ trim_suffix(resource, "/") == concat("", ["hdfs:dir:", file])
+ }
+
+ # Resource mentions a folder higher up the tree, which will will grant access recursively
+ matches_resource(file, resource) if {
+ startswith(resource, "hdfs:dir:/")
+ # directories need to have a trailing slash
+ endswith(resource, "/")
+ startswith(file, trim_prefix(resource, "hdfs:dir:"))
+ }
+
+ action_sufficient_for_operation(action, operation) if {
+ action_hierarchy[action][_] == action_for_operation[operation]
+ }
+
+ action_hierarchy := {
+ "full": ["full", "rw", "ro"],
+ "rw": ["rw", "ro"],
+ "ro": ["ro"],
+ }
+
+ # To get a (hopefully complete) list of actions run "ack 'String operationName = '" in the hadoop source code
+ action_for_operation := {
+ # The "rename" operation will be actually called on both - the source and the target location.
+ # Because of this you need to have rw permissions on the source and target file - which is desired
+
+ "abandonBlock": "rw",
+ "addCacheDirective": "rw",
+ "addCachePool": "full",
+ "addErasureCodingPolicies": "full",
+ "allowSnapshot": "full",
+ "append": "rw",
+ "cancelDelegationToken": "ro",
+ "checkAccess": "ro",
+ "clearQuota": "full",
+ "clearSpaceQuota": "full",
+ "completeFile": "rw",
+ "computeSnapshotDiff": "full",
+ "concat": "rw",
+ "contentSummary": "ro",
+ "create": "rw",
+ "createEncryptionZone": "full",
+ "createSnapshot": "full",
+ "createSymlink": "rw",
+ "delete": "rw",
+ "deleteSnapshot": "full",
+ "disableErasureCodingPolicy": "full",
+ "disallowSnapshot": "full",
+ "enableErasureCodingPolicy": "full",
+ "finalizeRollingUpgrade": "full",
+ "fsck": "full",
+ "fsckGetBlockLocations": "full",
+ "fsync": "rw",
+ "gcDeletedSnapshot": "full",
+ "getAclStatus": "ro",
+ "getAdditionalBlock": "ro",
+ "getAdditionalDatanode": "ro",
+ "getDelegationToken": "ro",
+ "getECTopologyResultForPolicies": "ro",
+ "getErasureCodingCodecs": "ro",
+ "getErasureCodingPolicies": "ro",
+ "getErasureCodingPolicy": "ro",
+ "getEZForPath": "ro",
+ "getfileinfo": "ro",
+ "getPreferredBlockSize": "ro",
+ "getStoragePolicy": "ro",
+ "getXAttrs": "ro",
+ "isFileClosed": "ro",
+ "listCacheDirectives": "ro",
+ "listCachePools": "ro",
+ "listCorruptFileBlocks": "ro",
+ "listEncryptionZones": "ro",
+ "listOpenFiles": "ro",
+ "listReencryptionStatus": "ro",
+ "ListSnapshot": "ro", # Yeah, this really starts with a capital letter
+ "listSnapshottableDirectory": "ro",
+ "listStatus": "ro",
+ "listXAttrs": "ro",
+ "mkdirs": "rw",
+ "modifyAclEntries": "full",
+ "modifyCacheDirective": "rw",
+ "modifyCachePool": "full",
+ "open": "ro",
+ "queryRollingUpgrade": "ro",
+ "quotaUsage": "ro",
+ "recoverLease": "full",
+ "reencryptEncryptionZone": "full",
+ "removeAcl": "full",
+ "removeAclEntries": "full",
+ "removeCacheDirective": "rw",
+ "removeCachePool": "full",
+ "removeDefaultAcl": "full",
+ "removeErasureCodingPolicy": "full",
+ "removeXAttr": "rw",
+ "rename": "rw",
+ "renameSnapshot": "full",
+ "renewDelegationToken": "ro",
+ "satisfyStoragePolicy": "full",
+ "setAcl": "full",
+ "setErasureCodingPolicy": "full",
+ "setOwner": "full",
+ "setPermission": "full",
+ "setQuota": "full",
+ "setReplication": "full",
+ "setSpaceQuota": "full",
+ "setStoragePolicy": "full",
+ "setTimes": "rw",
+ "setXAttr": "rw",
+ "startRollingUpgrade": "full",
+ "truncate": "rw",
+ "unsetErasureCodingPolicy": "full",
+ "unsetStoragePolicy": "full",
+ }
+
+ # Actions I think are only relevant for the whole filesystem, and not specific to a file or directory
+ admin_actions := {
+ "checkRestoreFailedStorage": "ro",
+ "datanodeReport": "ro",
+ "disableRestoreFailedStorage": "full",
+ "enableRestoreFailedStorage": "full",
+ "finalizeUpgrade": "rw",
+ "getDatanodeStorageReport": "ro",
+ "metaSave": "ro",
+ "monitorHealth": "ro",
+ "refreshNodes": "rw",
+ "rollEditLog": "rw",
+ "saveNamespace": "full",
+ "setBalancerBandwidth": "rw",
+ "slowDataNodesReport": "ro",
+ "transitionToActive": "full",
+ "transitionToObserver": "full",
+ "transitionToStandby": "full",
+ }
+
+ groups_for_user := {"admin": ["admins"], "alice": ["developers"], "bob": []}
+
+ acls := [
+ {
+ "identity": "group:admins",
+ "action": "full",
+ "resource": "hdfs:dir:/",
+ },
+ {
+ "identity": "group:developers",
+ "action": "rw",
+ "resource": "hdfs:dir:/developers/",
+ },
+ {
+ "identity": "group:developers",
+ "action": "ro",
+ "resource": "hdfs:dir:/developers-ro/",
+ },
+ {
+ "identity": "user:alice",
+ "action": "rw",
+ "resource": "hdfs:dir:/alice/",
+ },
+ {
+ "identity": "user:bob",
+ "action": "rw",
+ "resource": "hdfs:dir:/bob/",
+ },
+ {
+ "identity": "user:bob",
+ "action": "ro",
+ "resource": "hdfs:dir:/developers/",
+ },
+ {
+ "identity": "user:bob",
+ "action": "rw",
+ "resource": "hdfs:file:/developers/file-from-bob",
+ },
+ ]
diff --git a/test/stack/20-hdfs.yaml b/test/stack/20-hdfs.yaml
new file mode 100644
index 0000000..847a88e
--- /dev/null
+++ b/test/stack/20-hdfs.yaml
@@ -0,0 +1,78 @@
+---
+apiVersion: zookeeper.stackable.tech/v1alpha1
+kind: ZookeeperCluster
+metadata:
+ name: simple-zk
+spec:
+ image:
+ productVersion: 3.8.3
+ servers:
+ roleGroups:
+ default:
+ replicas: 1
+---
+apiVersion: zookeeper.stackable.tech/v1alpha1
+kind: ZookeeperZnode
+metadata:
+ name: simple-hdfs-znode
+spec:
+ clusterRef:
+ name: simple-zk
+---
+apiVersion: hdfs.stackable.tech/v1alpha1
+kind: HdfsCluster
+metadata:
+ name: simple-hdfs
+spec:
+ image:
+ productVersion: 3.3.6
+ custom: hdfs # Will be overwritten by Tilt
+ pullPolicy: IfNotPresent
+ clusterConfig:
+ dfsReplication: 1
+ zookeeperConfigMapName: simple-hdfs-znode
+ authentication:
+ tlsSecretClass: tls # pragma: allowlist secret
+ kerberos:
+ secretClass: kerberos-default # pragma: allowlist secret
+ nameNodes:
+ envOverrides: &envOverrides
+ HADOOP_CLASSPATH: "/stackable/hadoop/share/hadoop/tools/lib/*.jar"
+ configOverrides: &configOverrides
+ hdfs-site.xml:
+ dfs.namenode.inode.attributes.provider.class: tech.stackable.hadoop.StackableAuthorizer
+ core-site.xml:
+ # The mapper is only handled on the namenode so no need to apply this config to all roles
+ hadoop.security.group.mapping: tech.stackable.hadoop.StackableGroupMapper
+ hadoop.security.group.mapping.opa.policy.url: http://opa.default.svc.cluster.local:8081/v1/data/hdfs/groups
+ hadoop.security.authorization.opa.policy.url: http://opa.default.svc.cluster.local:8081/v1/data/hdfs/allow
+ # The operator adds a default static mapping when kerberos is activated, see:
+ # https://github.com/stackabletech/hdfs-operator/blob/main/rust/operator-binary/src/kerberos.rs#L97-L101
+ # This should be removed so that the mapping implementation can provide this information instead:
+ hadoop.user.group.static.mapping.overrides: ""
+ config:
+ logging:
+ containers:
+ hdfs:
+ console:
+ level: DEBUG
+ loggers:
+ ROOT:
+ level: INFO
+ tech.stackable.hadoop:
+ level: DEBUG
+ roleGroups:
+ default:
+ replicas: 2
+ dataNodes:
+ configOverrides: *configOverrides
+ envOverrides: *envOverrides
+ roleGroups:
+ default:
+ replicas: 1
+ journalNodes:
+ configOverrides: *configOverrides
+ envOverrides: *envOverrides
+ roleGroups:
+ default:
+ replicas: 1
diff --git a/test/stack/30-test-hdfs-permissions.yaml b/test/stack/30-test-hdfs-permissions.yaml
new file mode 100644
index 0000000..d2f8e6c
--- /dev/null
+++ b/test/stack/30-test-hdfs-permissions.yaml
@@ -0,0 +1,90 @@
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: test-hdfs-permissions
+spec:
+ template:
+ spec:
+ containers:
+ - name: test-hdfs-permissions
+ image: docker.stackable.tech/stackable/hadoop:3.3.6-stackable0.0.0-dev
+ env:
+ - name: HADOOP_CONF_DIR
+ value: /stackable/conf/hdfs
+ - name: KRB5_CONFIG
+ value: /stackable/kerberos/krb5.conf
+ - name: HADOOP_OPTS
+ value: -Djava.security.krb5.conf=/stackable/kerberos/krb5.conf
+ command:
+ - /bin/bash
+ - -c
+ - |
+ set -ex
+ klist -k /stackable/kerberos/keytab
+
+ log_in () { kdestroy; kinit -kt /stackable/kerberos/keytab $1/test-hdfs-permissions.default.svc.cluster.local; }
+
+ log_in admin
+ bin/hdfs dfs -ls /
+ bin/hdfs dfs -mkdir -p /alice
+ bin/hdfs dfs -mkdir -p /bob
+ bin/hdfs dfs -mkdir -p /developers
+ bin/hdfs dfs -mkdir -p /developers-ro
+ bin/hdfs dfs -ls -R /
+
+ log_in alice
+ bin/hdfs dfs -ls / && exit 1
+ bin/hdfs dfs -ls /alice
+ bin/hdfs dfs -ls /bob && exit 1
+ bin/hdfs dfs -ls /developers
+ bin/hdfs dfs -ls /developers-ro
+
+ bin/hdfs dfs -put -f /etc/hosts /alice/
+ bin/hdfs dfs -put -f /etc/hosts /bob/ && exit 1
+ bin/hdfs dfs -put -f /etc/hosts /developers/
+ bin/hdfs dfs -put -f /etc/hosts /developers-ro/ && exit 1
+
+ log_in bob
+ bin/hdfs dfs -ls / && exit 1
+ bin/hdfs dfs -ls /alice && exit 1
+ bin/hdfs dfs -ls /bob
+ bin/hdfs dfs -ls /developers
+ bin/hdfs dfs -ls /developers-ro && exit 1
+
+ sleep infinity
+
+ bin/hdfs dfs -ls /
+ bin/hdfs dfs -rm -f /hosts
+ bin/hdfs dfs -put -f /etc/hosts /hosts
+ bin/hdfs dfs -ls /
+ bin/hdfs dfs -cat /hosts
+ volumeMounts:
+ - name: hdfs-config
+ mountPath: /stackable/conf/hdfs
+ - name: kerberos
+ mountPath: /stackable/kerberos
+ volumes:
+ - name: hdfs-config
+ configMap:
+ name: simple-hdfs
+ - name: kerberos
+ ephemeral:
+ volumeClaimTemplate:
+ metadata:
+ annotations:
+ secrets.stackable.tech/class: kerberos-default
+ secrets.stackable.tech/scope: service=test-hdfs-permissions
+ secrets.stackable.tech/kerberos.service.names: admin,alice,bob
+ spec:
+ storageClassName: secrets.stackable.tech
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: "1"
+ securityContext:
+ fsGroup: 1000
+ runAsGroup: 1000
+ runAsUser: 1000
+ restartPolicy: OnFailure