Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions tap_postgres/stream_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,27 +72,28 @@ def refresh_streams_schema(conn_config: Dict, streams: List[Dict]):
for stream in discover_db(conn, conn_config.get('filter_schemas'), [st['table_name'] for st in streams])
}

for idx, stream in enumerate(streams):
streams[idx]['schema'] = copy.deepcopy(new_discovery[stream['tap_stream_id']]['schema'])

LOGGER.debug('New discovery schemas %s', new_discovery)

# For every stream dictionary, update the schema and metadata from the new discovery
for idx, stream in enumerate(streams):
# update schema
streams[idx]['schema'] = copy.deepcopy(new_discovery[stream['tap_stream_id']]['schema'])

# Update metadata
#
# 1st step: new discovery doesn't contain non-discoverable metadata: e.g replication method & key, selected
# so let's copy those from the original stream object
md_map = metadata.to_map(stream['metadata'])
meta = md_map.get(())
original_metadata_map = metadata.to_map(stream['metadata'])
new_metadata_map = metadata.to_map(new_discovery[stream['tap_stream_id']]['metadata'])

for metadata_key in new_metadata_map:
if metadata_key in original_metadata_map:
original_metadata_map[metadata_key].update(new_metadata_map[metadata_key])
else:
original_metadata_map[metadata_key] = new_metadata_map[metadata_key]

for idx_met, metadatum in enumerate(new_discovery[stream['tap_stream_id']]['metadata']):
if not metadatum['breadcrumb']:
meta.update(new_discovery[stream['tap_stream_id']]['metadata'][idx_met]['metadata'])
new_discovery[stream['tap_stream_id']]['metadata'][idx_met]['metadata'] = meta
updated_metadata = metadata.to_list(original_metadata_map)

# 2nd step: now copy all the metadata from the updated new discovery to the original stream
streams[idx]['metadata'] = copy.deepcopy(new_discovery[stream['tap_stream_id']]['metadata'])
Copy link
Author

@khoaanguyenn khoaanguyenn Jan 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. This original code doesn't respect metadata within the catalog provided by Meltano SDK, it's overridden by the new discovery stream from Postgres catalog every time it's executed. The above rectification retains the original selected field in each stream that is required to select the corresponding columns per configuration in Meltano YAML file.

streams[idx]['metadata'] = copy.deepcopy(updated_metadata)

LOGGER.debug('Updated streams schemas %s', streams)

Expand Down
9 changes: 8 additions & 1 deletion tests/integration/test_streams_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ def test_refresh_streams_schema(self):
'table-key-properties': ['some_id'],
'row-count': 1000,
}
},
{
'breadcrumb': ['properties', 'char_name'],
'metadata': {
'arbitrary_field_metadata': 'should be preserved'
}
}
]
}
Expand Down Expand Up @@ -86,7 +92,8 @@ def test_refresh_streams_schema(self):
'selected-by-default': True},
('properties', 'char_name'): {'selected-by-default': True,
'inclusion': 'available',
'sql-datatype': 'character'}})
'sql-datatype': 'character',
'arbitrary_field_metadata': 'should be preserved'}})

self.assertEqual({'properties': {'id': {'type': ['integer'],
'maximum': 2147483647,
Expand Down