Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ import {
TARGET_PLATFORM,
TARGET_PLATFORM_INSTANCE,
} from '@app/ingest/source/builder/RecipeForm/dbt_cloud';
import {
DORIS,
DORIS_DATABASE,
DORIS_HOST_PORT,
DORIS_PASSWORD,
DORIS_USERNAME,
} from '@app/ingest/source/builder/RecipeForm/doris';
import {
HIVE_DATABASE,
HIVE_HOST_PORT,
Expand Down Expand Up @@ -499,6 +506,18 @@ export const RECIPE_FIELDS: RecipeFields = {
],
filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.',
},
[DORIS]: {
fields: [DORIS_HOST_PORT, DORIS_USERNAME, DORIS_PASSWORD, DORIS_DATABASE],
filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY],
advancedFields: [
INCLUDE_TABLES,
INCLUDE_VIEWS,
TABLE_PROFILING_ENABLED,
COLUMN_PROFILING_ENABLED,
STATEFUL_INGESTION_ENABLED,
],
filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.',
},
[DATABRICKS]: {
fields: [WORKSPACE_URL, TOKEN],
filterFields: [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { FieldType, RecipeField } from '@app/ingest/source/builder/RecipeForm/common';

export const DORIS = 'doris';

export const DORIS_HOST_PORT: RecipeField = {
name: 'host_port',
label: 'Host and Port',
tooltip:
"The host and port where Apache Doris is running. For example, 'doris-server:9030'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).",
type: FieldType.TEXT,
fieldPath: 'source.config.host_port',
placeholder: 'doris-server:9030',
required: true,
rules: null,
};

export const DORIS_DATABASE: RecipeField = {
name: 'database',
label: 'Database',
tooltip: 'Ingest metadata for a specific Database.',
type: FieldType.TEXT,
fieldPath: 'source.config.database',
placeholder: 'my_db',
required: true,
rules: null,
};

export const DORIS_USERNAME: RecipeField = {
name: 'username',
label: 'Username',
tooltip: 'The Apache Doris username used to extract metadata.',
type: FieldType.TEXT,
fieldPath: 'source.config.username',
placeholder: 'root',
required: true,
rules: null,
};

export const DORIS_PASSWORD: RecipeField = {
name: 'password',
label: 'Password',
tooltip: 'The Apache Doris password for the user.',
type: FieldType.SECRET,
fieldPath: 'source.config.password',
placeholder: 'password',
required: true,
rules: null,
};
8 changes: 8 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/mariadb/",
"recipe": "source:\n type: mariadb\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:doris",
"name": "doris",
"displayName": "Apache Doris",
"description": "Import Tables, Views, Databases, Schemas, and statistics from Apache Doris.",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/doris/",
"recipe": "source:\n type: doris\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:mongodb",
"name": "mongodb",
Expand Down
35 changes: 35 additions & 0 deletions datahub-web-react/src/app/ingest/source/conf/doris/doris.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { SourceConfig } from '@app/ingest/source/conf/types';

import mysqlLogo from '@images/mysqllogo-2.png';

const placeholderRecipe = `\
source:
type: doris
config:
# Coordinates
host_port: # Your Apache Doris host and port, e.g. doris:9030
database: # Your Apache Doris database name, e.g. datahub

# Credentials
# Add secret in Secrets Tab with relevant names for each variable
username: "\${DORIS_USERNAME}" # Your Apache Doris username, e.g. root
password: "\${DORIS_PASSWORD}" # Your Apache Doris password, e.g. password_01

# Options
include_tables: True
include_views: True

# Profiling
profiling:
enabled: false
`;

const dorisConfig: SourceConfig = {
type: 'doris',
placeholderRecipe,
displayName: 'Apache Doris',
docsUrl: 'https://docs.datahub.com/docs/generated/ingestion/sources/doris/',
logoUrl: mysqlLogo,
};

export default dorisConfig;
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ import {
TARGET_PLATFORM,
TARGET_PLATFORM_INSTANCE,
} from '@app/ingestV2/source/builder/RecipeForm/dbt_cloud';
import {
DORIS,
DORIS_DATABASE,
DORIS_HOST_PORT,
DORIS_PASSWORD,
DORIS_USERNAME,
} from '@app/ingestV2/source/builder/RecipeForm/doris';
import {
HIVE_DATABASE,
HIVE_HOST_PORT,
Expand Down Expand Up @@ -473,6 +480,18 @@ export const RECIPE_FIELDS: RecipeFields = {
],
filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.',
},
[DORIS]: {
fields: [DORIS_HOST_PORT, DORIS_USERNAME, DORIS_PASSWORD, DORIS_DATABASE],
filterFields: [SCHEMA_ALLOW, SCHEMA_DENY, TABLE_ALLOW, TABLE_DENY, VIEW_ALLOW, VIEW_DENY],
advancedFields: [
INCLUDE_TABLES,
INCLUDE_VIEWS,
TABLE_PROFILING_ENABLED,
COLUMN_PROFILING_ENABLED,
STATEFUL_INGESTION_ENABLED,
],
filterSectionTooltip: 'Include or exclude specific Schemas, Tables and Views from ingestion.',
},
[DATABRICKS]: {
fields: [WORKSPACE_URL, TOKEN],
filterFields: [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { FieldType, RecipeField } from '@app/ingestV2/source/builder/RecipeForm/common';

export const DORIS = 'doris';

export const DORIS_HOST_PORT: RecipeField = {
name: 'host_port',
label: 'Host and Port',
tooltip:
"The host and port where Apache Doris is running. For example, 'doris-server:9030'. Note: this host must be accessible on the network where DataHub is running (or allowed via an IP Allow List, AWS PrivateLink, etc).",
type: FieldType.TEXT,
fieldPath: 'source.config.host_port',
placeholder: 'doris-server:9030',
required: true,
rules: null,
};

export const DORIS_DATABASE: RecipeField = {
name: 'database',
label: 'Database',
tooltip: 'Ingest metadata for a specific Database.',
type: FieldType.TEXT,
fieldPath: 'source.config.database',
placeholder: 'my_db',
required: true,
rules: null,
};

export const DORIS_USERNAME: RecipeField = {
name: 'username',
label: 'Username',
tooltip: 'The Apache Doris username used to extract metadata.',
type: FieldType.TEXT,
fieldPath: 'source.config.username',
placeholder: 'root',
required: true,
rules: null,
};

export const DORIS_PASSWORD: RecipeField = {
name: 'password',
label: 'Password',
tooltip: 'The Apache Doris password for the user.',
type: FieldType.SECRET,
fieldPath: 'source.config.password',
placeholder: 'password',
required: true,
rules: null,
};
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/mariadb/",
"recipe": "source:\n type: mariadb\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:doris",
"name": "doris",
"displayName": "Apache Doris",
"description": "Import Tables, Views, Databases, Schemas, and statistics from Apache Doris.",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/doris/",
"recipe": "source:\n type: doris\n config:\n # Coordinates\n host_port: null\n # The name\n database: null\n # Credentials\n username: null\n include_views: true\n include_tables: true\n profiling:\n enabled: true\n profile_table_level_only: true\n stateful_ingestion:\n enabled: true"
},
{
"urn": "urn:li:dataPlatform:mongodb",
"name": "mongodb",
Expand Down
35 changes: 35 additions & 0 deletions datahub-web-react/src/app/ingestV2/source/conf/doris/doris.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { SourceConfig } from '@app/ingestV2/source/conf/types';

import mysqlLogo from '@images/mysqllogo-2.png';

const placeholderRecipe = `\
source:
type: doris
config:
# Coordinates
host_port: # Your Apache Doris host and port, e.g. doris:9030
database: # Your Apache Doris database name, e.g. datahub

# Credentials
# Add secret in Secrets Tab with relevant names for each variable
username: "\${DORIS_USERNAME}" # Your Apache Doris username, e.g. root
password: "\${DORIS_PASSWORD}" # Your Apache Doris password, e.g. password_01

# Options
include_tables: True
include_views: True

# Profiling
profiling:
enabled: false
`;

const dorisConfig: SourceConfig = {
type: 'doris',
placeholderRecipe,
displayName: 'Apache Doris',
docsUrl: 'https://docs.datahub.com/docs/generated/ingestion/sources/doris/',
logoUrl: mysqlLogo,
};

export default dorisConfig;
Binary file added datahub-web-react/src/images/dorislogo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
133 changes: 133 additions & 0 deletions metadata-ingestion/docs/sources/doris/doris_pre.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
### Prerequisites

In order to execute this source, the user credentials need the following privileges:

```sql
-- Grant necessary privileges to the DataHub user
GRANT SELECT_PRIV ON your_database.* TO 'datahub_user'@'%';
GRANT SHOW_VIEW_PRIV ON your_database.* TO 'datahub_user'@'%';

-- For profiling (optional, if profiling is enabled)
GRANT SELECT_PRIV ON your_database.* TO 'datahub_user'@'%';
```

**Note:** `SELECT_PRIV` is required to read table structures and perform profiling operations. `SHOW_VIEW_PRIV` is required to ingest views.

#### Apache Doris Compatibility Notes

Apache Doris uses the MySQL protocol for client connections, but with some key differences:

**Port Configuration:**

- Default Doris query port: **9030** (FE MySQL protocol port)
- **Not** MySQL's default 3306
- Ensure you use `host_port: doris-server:9030` in your configuration

**Architecture:**

- Doris uses a Frontend (FE) and Backend (BE) architecture
- DataHub connects to the FE node on port 9030
- Ensure the FE node is accessible and healthy

**Data Types:**

- Doris includes additional data types: `HLL`, `BITMAP`, `ARRAY`, `JSONB`, `QUANTILE_STATE`
- These types are automatically mapped to appropriate DataHub types
- No additional configuration needed

**Stored Procedures:**

- Apache Doris does not support stored procedures
- The `information_schema.ROUTINES` table is a MySQL compatibility stub (always empty)
- The connector automatically handles this limitation

### Troubleshooting

#### Connection Issues

**Problem:** `Can't connect to MySQL server` or connection timeouts

**Solutions:**

- Verify you're using port **9030** (query port), not 9050 (HTTP port) or 3306 (MySQL default)
- Check that the Doris FE (Frontend) node is running: `curl http://fe-host:8030/api/bootstrap`
- Ensure network connectivity and firewall rules allow connections to port 9030
- Verify the FE node has registered BE nodes: `SHOW BACKENDS;`

**Problem:** `Access denied for user`

**Solutions:**

- Verify the user has been granted `SELECT_PRIV` and `SHOW_VIEW_PRIV`
- Check grants with: `SHOW GRANTS FOR 'datahub_user'@'%';`
- Ensure the user is allowed to connect from your host: use `'%'` for any host or specify the IP

#### Missing Metadata

**Problem:** Tables or views are not being ingested

**Solutions:**

- Verify the user has `SELECT_PRIV` on the target databases/tables
- Check that tables exist and are visible: `SHOW TABLES IN your_database;`
- Review `schema_pattern` and `table_pattern` in your recipe configuration
- Ensure the database is not filtered out by your configuration

**Problem:** Column types showing as UNKNOWN

**Solutions:**

- This typically happens with Doris-specific types in older DataHub versions
- Ensure you're using the latest DataHub version which includes Doris type mappings
- Check Doris FE logs for any metadata query errors

#### Performance Issues

**Problem:** Ingestion is slow or timing out

**Solutions:**

- Use `schema_pattern` and `table_pattern` to limit scope: `schema_pattern: {"allow": ["important_db"]}`
- Enable table-level-only profiling: `profiling.profile_table_level_only: true`
- Disable profiling if not needed: `profiling.enabled: false`
- Increase query timeouts if you have very large tables: `options.connect_timeout: 300`

**Problem:** Doris FE or BE is overloaded during ingestion

**Solutions:**

- Reduce profiling sample size: `profiling.max_number_of_fields_to_profile: 10`
- Schedule ingestion during off-peak hours
- Increase `profiling.query_combiner_enabled: false` to avoid complex queries

#### Profiling Issues

**Problem:** Profiling fails or returns no statistics

**Solutions:**

- Verify user has `SELECT_PRIV` on target tables
- Check that tables contain data (empty tables have no statistics)
- Ensure Doris statistics are up to date: `ANALYZE TABLE your_table;`
- Review Doris FE logs for query errors during profiling

#### Doris-Specific Issues

**Problem:** Warnings about `DUPLICATE KEY` or `DISTRIBUTED BY HASH`

**Solutions:**

- These are informational warnings from SQLAlchemy parsing Doris-specific table properties
- They do not affect ingestion and can be safely ignored
- The connector handles these properties correctly

**Problem:** View lineage not being captured

**Solutions:**

- Ensure `include_view_lineage: true` (enabled by default)
- Verify views are created with proper table references
- Check that referenced tables are accessible to the DataHub user
- Review `include_view_column_lineage` configuration

For additional support, consult the [Apache Doris documentation](https://doris.apache.org/docs) or reach out to the DataHub community.
Loading
Loading