# This is the main configuration file for the application. # ~~~~~ # Secret key # ~~~~~ # The secret key is used to secure cryptographics functions. # # This must be changed for production, but we recommend not changing it in this file. # # See http://www.playframework.com/documentation/latest/ApplicationSecret for more details. play.http.secret.key = ${DATAHUB_SECRET} # The application languages # ~~~~~ play.i18n.langs = ["en"] play.application.loader = play.inject.guice.GuiceApplicationLoader # Play http parser settings # # # # Increase default buffer size to handle large post request play.http.parser.maxMemoryBuffer = 10MB play.http.parser.maxMemoryBuffer = ${?DATAHUB_PLAY_MEM_BUFFER_SIZE} # TODO: Disable legacy URL encoding eventually play.modules.disabled += "play.api.mvc.CookiesModule" play.modules.enabled += "play.api.mvc.LegacyCookiesModule" play.modules.enabled += "auth.AuthModule" # We override the Akka server provider to allow setting the max header count to a higher value # This is useful while using proxies like Envoy that result in the frontend server rejecting GMS # responses as there's more than the max of 64 allowed headers play.server.provider = server.CustomAkkaHttpServerProvider play.http.server.akka.max-header-count = 64 play.http.server.akka.max-header-count = ${?DATAHUB_AKKA_MAX_HEADER_COUNT} play.server.akka.max-header-size = 8k play.server.akka.max-header-size = ${?DATAHUB_AKKA_MAX_HEADER_VALUE_LENGTH} # Update AUTH_COOKIE_SAME_SITE and AUTH_COOKIE_SECURE in order to change how authentication cookies # are configured. If you wish cookies to be sent in first and third party contexts, set # AUTH_COOKIE_SAME_SITE = "NONE" and AUTH_COOKIE_SECURE = true. If AUTH_COOKIE_SAME_SITE is "NONE", # AUTH_COOKIE_SECURE must be set to true. play.http.session.sameSite = "LAX" play.http.session.sameSite = ${?AUTH_COOKIE_SAME_SITE} play.http.session.secure = false play.http.session.secure = ${?AUTH_COOKIE_SECURE} play.filters { enabled = [ play.filters.gzip.GzipFilter ] gzip { contentType { # If non empty, then a response will only be compressed if its content type is in this list. whiteList = [ "text/*", "application/javascript", "application/json" ] # The black list is only used if the white list is empty. # Compress all responses except the ones whose content type is in this list. blackList = [] } } } # pac4j configuration # default to PlayCookieSessionStore to keep datahub-frontend's statelessness pac4j.sessionStore.provider= "PlayCookieSessionStore" pac4j.sessionStore.provider= ${?PAC4J_SESSIONSTORE_PROVIDER} # Database configuration # ~~~~~ # You can declare as many datasources as you want. # By convention, the default datasource is named `default` # # db.default.driver=org.h2.Driver # db.default.url="jdbc:h2:mem:play" # db.default.username=sa # db.default.password="" # Evolutions # ~~~~~ # You can disable evolutions if needed # play.evolutions.enabled=false # You can disable evolutions for a specific datasource if necessary # play.evolutions.db.default.enabled=false app.version = 0.0.0 app.version = ${?DATAHUB_APP_VERSION} dataset.hdfs_browser.link = "" linkedin.internal = false linkedin.show.dataset.lineage = true linkedin.suggestion.confidence.threshold = "50" tracking.piwik.siteid = "0" tracking.piwik.siteid = ${?DATAHUB_PIWIK_SITEID} tracking.piwik.url = "" tracking.piwik.url = ${?DATAHUB_PIWIK_URL} linkedin.links.avi.urlPrimary = "" linkedin.links.avi.urlFallback = "" links.wiki.appHelp = "https://github.com/datahub-project/datahub" links.wiki.gdprPii = "" links.wiki.tmsSchema = "" links.wiki.gdprTaxonomy = "" links.wiki.staleSearchIndex = "" links.wiki.dht = "" links.wiki.purgePolicies = "" links.wiki.jitAcl = "" links.wiki.metadataCustomRegex = "" links.wiki.complianceOwner = "" links.wiki.exportPolicy = "" links.wiki.metadataHealth = "" links.wiki.purgeKey = "" links.wiki.datasetDecommission = "" ui.show.ownership.revamp = true ui.show.staging.banner = false ui.show.people = true ui.show.CM.banner = false ui.show.CM.link = "" ui.show.stale.search = true ui.show.live.data.banner = false ui.show.lineage.graph = true ui.show.advanced.search = true ui.show.institutional.memory = true ui.new.browse.dataset = true # React App Authentication # ~~~~~ # React currently supports OIDC SSO + self-configured JAAS for authentication. Below you can find the supported configurations for # each mechanism. # # Required OIDC Configuration Values: # auth.oidc.enabled = ${?AUTH_OIDC_ENABLED} # Enable OIDC authentication, disabled by default auth.oidc.clientId = ${?AUTH_OIDC_CLIENT_ID} # Unique client id issued by the identity provider auth.oidc.clientSecret = ${?AUTH_OIDC_CLIENT_SECRET} # Unique client secret issued by the identity provider. auth.oidc.discoveryUri = ${?AUTH_OIDC_DISCOVERY_URI} # The IdP OIDC discovery url. auth.baseUrl = ${?AUTH_OIDC_BASE_URL} # The base URL associated with your DataHub deployment. # # Optional OIDC Configuration Values: # auth.oidc.userNameClaim = ${?AUTH_OIDC_USER_NAME_CLAIM} # The attribute / claim used to derive the DataHub username. Defaults to "preferred_username". auth.oidc.userNameClaimRegex = ${?AUTH_OIDC_USER_NAME_CLAIM_REGEX} # The regex used to parse the DataHub username from the user name claim. Defaults to (.*) (all) auth.oidc.scope = ${?AUTH_OIDC_SCOPE} # String representing the requested scope from the IdP. Defaults to "oidc email profile" auth.oidc.clientAuthenticationMethod = ${?AUTH_OIDC_CLIENT_AUTHENTICATION_METHOD} # Which authentication method to use to pass credentials (clientId and clientSecret) to the token endpoint: Defaults to "client_secret_basic" auth.oidc.jitProvisioningEnabled = ${?AUTH_OIDC_JIT_PROVISIONING_ENABLED} # Whether DataHub users should be provisioned on login if they do not exist. Defaults to true. auth.oidc.preProvisioningRequired = ${?AUTH_OIDC_PRE_PROVISIONING_REQUIRED} # Whether the user should already exist in DataHub on login, failing login if they are not. Defaults to false. auth.oidc.extractGroupsEnabled = ${?AUTH_OIDC_EXTRACT_GROUPS_ENABLED} # Whether groups should be extracted from a claim in the OIDC profile. Only applies if JIT provisioning is enabled. Groups will be created if they do not exist. Defaults to true. auth.oidc.groupsClaim = ${?AUTH_OIDC_GROUPS_CLAIM} # The OIDC claim to extract groups information from. Defaults to 'groups' auth.oidc.responseType = ${?AUTH_OIDC_RESPONSE_TYPE} auth.oidc.responseMode = ${?AUTH_OIDC_RESPONSE_MODE} auth.oidc.useNonce = ${?AUTH_OIDC_USE_NONCE} auth.oidc.customParam.resource = ${?AUTH_OIDC_CUSTOM_PARAM_RESOURCE} auth.oidc.readTimeout = ${?AUTH_OIDC_READ_TIMEOUT} auth.oidc.extractJwtAccessTokenClaims = ${?AUTH_OIDC_EXTRACT_JWT_ACCESS_TOKEN_CLAIMS} # Whether to extract claims from JWT access token. Defaults to false. auth.oidc.preferredJwsAlgorithm = ${?AUTH_OIDC_PREFERRED_JWS_ALGORITHM} # Which jws algorithm to use # # By default, the callback URL that should be registered with the identity provider is computed as {$baseUrl}/callback/oidc. # For example, the default callback URL for a local deployment of DataHub would be "http://localhost:9002/callback/oidc". # This callback URL should be registered with the identity provider during configuration. # # # JAAS Optional Configuration Values # # In addition to OIDC, JAAS authentication with a dummy login module is enabled by default. Currently, this accepts # any username / password combination as valid credentials. To disable this entry point altogether, specify the following config: # auth.jaas.enabled = ${?AUTH_JAAS_ENABLED} auth.native.enabled = ${?AUTH_NATIVE_ENABLED} # # To disable all authentication to the app, and proxy all users through a master "datahub" account, make sure that, # jaas, native and oidc auth are disabled: # # auth.jaas.enabled = false # auth.native.enabled = false # auth.oidc.enabled = false # (or simply omit oidc configurations) # Login session expiration time auth.session.ttlInHours = 720 auth.session.ttlInHours = ${?AUTH_SESSION_TTL_HOURS} analytics.enabled = ${?DATAHUB_ANALYTICS_ENABLED} # Kafka Producer Configuration analytics.kafka.bootstrap.server = ${KAFKA_BOOTSTRAP_SERVER} analytics.tracking.topic = DataHubUsageEvent_v1 analytics.tracking.topic = ${?DATAHUB_TRACKING_TOPIC} analytics.kafka.delivery.timeout.ms = 30000 analytics.kafka.delivery.timeout.ms = ${?KAFKA_PROPERTIES_DELIVERY_TIMEOUT_MS} # Kafka Producer SSL Configs. All must be provided to enable SSL. analytics.kafka.security.protocol = ${?KAFKA_PROPERTIES_SECURITY_PROTOCOL} analytics.kafka.ssl.key.password = ${?KAFKA_PROPERTIES_SSL_KEY_PASSWORD} analytics.kafka.ssl.keystore.type = ${?KAFKA_PROPERTIES_SSL_KEYSTORE_TYPE} analytics.kafka.ssl.keystore.location = ${?KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION} analytics.kafka.ssl.keystore.password = ${?KAFKA_PROPERTIES_SSL_KEYSTORE_PASSWORD} analytics.kafka.ssl.truststore.type = ${?KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE} analytics.kafka.ssl.truststore.location = ${?KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION} analytics.kafka.ssl.truststore.password = ${?KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD} analytics.kafka.ssl.protocol = ${?KAFKA_PROPERTIES_SSL_PROTOCOL} analytics.kafka.ssl.endpoint.identification.algorithm = ${?KAFKA_PROPERTIES_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM} # If analytics.kafka.security.protocol is set to "SASL_SSL", both of these need to be set. analytics.kafka.sasl.mechanism = ${?KAFKA_PROPERTIES_SASL_MECHANISM} analytics.kafka.sasl.jaas.config = ${?KAFKA_PROPERTIES_SASL_JAAS_CONFIG} analytics.kafka.sasl.kerberos.service.name = ${?KAFKA_PROPERTIES_SASL_KERBEROS_SERVICE_NAME} analytics.kafka.sasl.login.callback.handler.class = ${?KAFKA_PROPERTIES_SASL_LOGIN_CALLBACK_HANDLER_CLASS} analytics.kafka.sasl.client.callback.handler.class = ${?KAFKA_PROPERTIES_SASL_CLIENT_CALLBACK_HANDLER_CLASS} # Required Elastic Client Configuration analytics.elastic.host = ${?ELASTIC_CLIENT_HOST} analytics.elastic.port = ${?ELASTIC_CLIENT_PORT} # Optional Elastic Client Configurations analytics.elastic.threadCount = ${?ELASTIC_CLIENT_THREAD_COUNT} analytics.elastic.connectionRequestTimeout = ${?ELASTIC_CLIENT_CONNECTION_REQUEST_TIMEOUT} # Elastic Client SSL Configs. Required if analytics.elastic.useSSL is true. analytics.elastic.useSSL = ${?ELASTIC_CLIENT_USE_SSL} analytics.elastic.sslContext.sslProtocol = ${?ELASTIC_CLIENT_SSL_PROTOCOL} analytics.elastic.sslContext.sslSecureRandomImplementation = ${?ELASTIC_CLIENT_SSL_SECURE_RANDOM_IMPLEMENTATION} analytics.elastic.sslContext.sslTrustStoreFile = ${?ELASTIC_CLIENT_SSL_TRUST_STORE_FILE} analytics.elastic.sslContext.sslTrustStoreType = ${?ELASTIC_CLIENT_SSL_TRUST_STORE_TYPE} analytics.elastic.sslContext.sslTrustStorePassword = ${?ELASTIC_CLIENT_SSL_TRUST_STORE_PASSWORD} analytics.elastic.sslContext.sslKeyStoreFile = ${?ELASTIC_CLIENT_SSL_KEY_STORE_FILE} analytics.elastic.sslContext.sslKeyStoreType = ${?ELASTIC_CLIENT_SSL_KEY_STORE_TYPE} analytics.elastic.sslContext.sslKeyStorePassword = ${?ELASTIC_CLIENT_SSL_KEY_STORE_PASSWORD} # Optional username + password for use with SSL analytics.elastic.username = ${?ELASTIC_CLIENT_USERNAME} analytics.elastic.password = ${?ELASTIC_CLIENT_PASSWORD} analytics.elastic.indexPrefix = ${?ELASTIC_INDEX_PREFIX} # Metadata Service Configs metadataService.host=${?DATAHUB_GMS_HOST} metadataService.port=${?DATAHUB_GMS_PORT} metadataService.useSsl=${?DATAHUB_GMS_USE_SSL} # Internal SSL is not fully supported yet. # Set to "true" to enable Metadata Service Authentication. False BY DEFAULT. metadataService.auth.enabled=${?METADATA_SERVICE_AUTH_ENABLED} # Required when metadataService.auth.enabled is "true". Provides a secure identifier for datahub-frontend that the # Metadata Service trusts for generating Access Tokens on behalf of a particular user. # This MUST match the configurations of the same name for the Metadata Service. systemClientId = __datahub_system # Change this to something random. systemClientSecret = JohnSnowKnowsNothing # Along with this. systemClientId=${?DATAHUB_SYSTEM_CLIENT_ID} systemClientSecret=${?DATAHUB_SYSTEM_CLIENT_SECRET} entityClient.retryInterval = 2 entityClient.retryInterval = ${?ENTITY_CLIENT_RETRY_INTERVAL} entityClient.numRetries = 3 entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES}