From db654f37c8732419e2030cb0ca84b729bb9681c5 Mon Sep 17 00:00:00 2001 From: Mayoor Rao Date: Thu, 7 Nov 2024 10:30:48 -0800 Subject: [PATCH 1/3] oracle connection from secret --- ads/oracledb/oracle_db.py | 54 ++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/ads/oracledb/oracle_db.py b/ads/oracledb/oracle_db.py index 117d69259..e2b660288 100644 --- a/ads/oracledb/oracle_db.py +++ b/ads/oracledb/oracle_db.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*-- -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ """ @@ -17,19 +16,20 @@ Note: We need to account for cx_Oracle though oracledb can operate in thick mode. The end user may be is using one of the old conda packs or an environment where cx_Oracle is the only available driver. """ -from ads.common.utils import ORACLE_DEFAULT_PORT - import logging -import numpy as np import os -import pandas as pd import tempfile -from time import time -from typing import Dict, Optional, List, Union, Iterator import zipfile +from time import time +from typing import Dict, Iterator, List, Optional, Union + +import numpy as np +import pandas as pd + from ads.common.decorator.runtime_dependency import ( OptionalDependency, ) +from ads.common.utils import ORACLE_DEFAULT_PORT logger = logging.getLogger("ads.oracle_connector") CX_ORACLE = "cx_Oracle" @@ -40,17 +40,17 @@ import oracledb as oracle_driver # Both the driver share same signature for the APIs that we are using. PYTHON_DRIVER_NAME = PYTHON_ORACLEDB -except: +except ModuleNotFoundError as err: logger.info("oracledb package not found. Trying to load cx_Oracle") try: import cx_Oracle as oracle_driver PYTHON_DRIVER_NAME = CX_ORACLE - except ModuleNotFoundError: - raise ModuleNotFoundError( + except ModuleNotFoundError as err2: + raise ModuleNotFoundError ( f"Neither `oracledb` nor `cx_Oracle` module was not found. Please run " f"`pip install {OptionalDependency.DATA}`." - ) + ) from err2 class OracleRDBMSConnection(oracle_driver.Connection): @@ -75,7 +75,7 @@ def __init__( logger.info( "Running oracledb driver in thick mode. For mTLS based connection, thick mode is default." ) - except: + except Exception as err: logger.info( "Could not use thick mode. The driver is running in thin mode. System might prompt for passphrase" ) @@ -154,7 +154,6 @@ def insert( batch_size=100000, encoding="utf-8", ): - if if_exists not in ["fail", "replace", "append"]: raise ValueError( f"Unknown option `if_exists`={if_exists}. Valid options are 'fail', 'replace', 'append'" @@ -173,7 +172,6 @@ def insert( df_orcl.columns = df_orcl.columns.str.replace(r"\W+", "_", regex=True) table_exist = True with self.cursor() as cursor: - if if_exists != "replace": try: cursor.execute(f"SELECT 1 from {table_name} FETCH NEXT 1 ROWS ONLY") @@ -275,7 +273,6 @@ def chunks(lst: List, batch_size: int): yield lst[i : i + batch_size] for batch in chunks(record_data, batch_size=batch_size): - cursor.executemany(sql, batch, batcherrors=True) for error in cursor.getbatcherrors(): @@ -304,7 +301,6 @@ def _fetch_by_batch(self, cursor, chunksize): def query( self, sql: str, bind_variables: Optional[Dict], chunksize=None ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: - start_time = time() cursor = self.cursor() @@ -315,10 +311,8 @@ def query( cursor.execute(sql, **bind_variables) columns = [row[0] for row in cursor.description] df = iter( - ( - pd.DataFrame(data=rows, columns=columns) - for rows in self._fetch_by_batch(cursor, chunksize) - ) + pd.DataFrame(data=rows, columns=columns) + for rows in self._fetch_by_batch(cursor, chunksize) ) else: @@ -332,3 +326,21 @@ def query( ) return df + + +def get_adw_connection(vault_secret_id: str) -> "oracledb.Connection": + """Creates ADW connection from the credentials stored in the vault""" + import oracledb + + from ads.secrets.adb import ADBSecretKeeper + + secret = vault_secret_id + + logging.getLogger().debug(f"The secret id is: {secret}") + creds = ADBSecretKeeper.load_secret(secret).to_dict() + user = creds.pop("user_name", None) + password = creds.pop("password", None) + if not user or not password: + raise ValueError(f"The user or password is missing in {secret}") + logging.getLogger().debug(f"Downloaded secrets from: {secret}") + return oracledb.connect(user=user, password=password, **creds) From 31d5913c9382f4b97fc32aacc93ffb3443b069e8 Mon Sep 17 00:00:00 2001 From: Mayoor Rao Date: Thu, 7 Nov 2024 10:33:36 -0800 Subject: [PATCH 2/3] Revert "oracle connection from secret" This reverts commit 3082241384078df92162ca5d815c37a015c76b08. --- ads/oracledb/oracle_db.py | 54 +++++++++++++++------------------------ 1 file changed, 21 insertions(+), 33 deletions(-) diff --git a/ads/oracledb/oracle_db.py b/ads/oracledb/oracle_db.py index e2b660288..117d69259 100644 --- a/ads/oracledb/oracle_db.py +++ b/ads/oracledb/oracle_db.py @@ -1,6 +1,7 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*-- -# Copyright (c) 2021, 2024 Oracle and/or its affiliates. +# Copyright (c) 2021, 2023 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ """ @@ -16,20 +17,19 @@ Note: We need to account for cx_Oracle though oracledb can operate in thick mode. The end user may be is using one of the old conda packs or an environment where cx_Oracle is the only available driver. """ +from ads.common.utils import ORACLE_DEFAULT_PORT + import logging +import numpy as np import os +import pandas as pd import tempfile -import zipfile from time import time -from typing import Dict, Iterator, List, Optional, Union - -import numpy as np -import pandas as pd - +from typing import Dict, Optional, List, Union, Iterator +import zipfile from ads.common.decorator.runtime_dependency import ( OptionalDependency, ) -from ads.common.utils import ORACLE_DEFAULT_PORT logger = logging.getLogger("ads.oracle_connector") CX_ORACLE = "cx_Oracle" @@ -40,17 +40,17 @@ import oracledb as oracle_driver # Both the driver share same signature for the APIs that we are using. PYTHON_DRIVER_NAME = PYTHON_ORACLEDB -except ModuleNotFoundError as err: +except: logger.info("oracledb package not found. Trying to load cx_Oracle") try: import cx_Oracle as oracle_driver PYTHON_DRIVER_NAME = CX_ORACLE - except ModuleNotFoundError as err2: - raise ModuleNotFoundError ( + except ModuleNotFoundError: + raise ModuleNotFoundError( f"Neither `oracledb` nor `cx_Oracle` module was not found. Please run " f"`pip install {OptionalDependency.DATA}`." - ) from err2 + ) class OracleRDBMSConnection(oracle_driver.Connection): @@ -75,7 +75,7 @@ def __init__( logger.info( "Running oracledb driver in thick mode. For mTLS based connection, thick mode is default." ) - except Exception as err: + except: logger.info( "Could not use thick mode. The driver is running in thin mode. System might prompt for passphrase" ) @@ -154,6 +154,7 @@ def insert( batch_size=100000, encoding="utf-8", ): + if if_exists not in ["fail", "replace", "append"]: raise ValueError( f"Unknown option `if_exists`={if_exists}. Valid options are 'fail', 'replace', 'append'" @@ -172,6 +173,7 @@ def insert( df_orcl.columns = df_orcl.columns.str.replace(r"\W+", "_", regex=True) table_exist = True with self.cursor() as cursor: + if if_exists != "replace": try: cursor.execute(f"SELECT 1 from {table_name} FETCH NEXT 1 ROWS ONLY") @@ -273,6 +275,7 @@ def chunks(lst: List, batch_size: int): yield lst[i : i + batch_size] for batch in chunks(record_data, batch_size=batch_size): + cursor.executemany(sql, batch, batcherrors=True) for error in cursor.getbatcherrors(): @@ -301,6 +304,7 @@ def _fetch_by_batch(self, cursor, chunksize): def query( self, sql: str, bind_variables: Optional[Dict], chunksize=None ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: + start_time = time() cursor = self.cursor() @@ -311,8 +315,10 @@ def query( cursor.execute(sql, **bind_variables) columns = [row[0] for row in cursor.description] df = iter( - pd.DataFrame(data=rows, columns=columns) - for rows in self._fetch_by_batch(cursor, chunksize) + ( + pd.DataFrame(data=rows, columns=columns) + for rows in self._fetch_by_batch(cursor, chunksize) + ) ) else: @@ -326,21 +332,3 @@ def query( ) return df - - -def get_adw_connection(vault_secret_id: str) -> "oracledb.Connection": - """Creates ADW connection from the credentials stored in the vault""" - import oracledb - - from ads.secrets.adb import ADBSecretKeeper - - secret = vault_secret_id - - logging.getLogger().debug(f"The secret id is: {secret}") - creds = ADBSecretKeeper.load_secret(secret).to_dict() - user = creds.pop("user_name", None) - password = creds.pop("password", None) - if not user or not password: - raise ValueError(f"The user or password is missing in {secret}") - logging.getLogger().debug(f"Downloaded secrets from: {secret}") - return oracledb.connect(user=user, password=password, **creds) From e5f10afb927d8c17f52fa8007c1e6507183a254a Mon Sep 17 00:00:00 2001 From: Mayoor Rao Date: Thu, 7 Nov 2024 13:47:45 -0800 Subject: [PATCH 3/3] oracle connection from secret --- ads/oracledb/oracle_db.py | 52 ++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/ads/oracledb/oracle_db.py b/ads/oracledb/oracle_db.py index 117d69259..f76278d1c 100644 --- a/ads/oracledb/oracle_db.py +++ b/ads/oracledb/oracle_db.py @@ -1,7 +1,6 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*-- -# Copyright (c) 2021, 2023 Oracle and/or its affiliates. +# Copyright (c) 2021, 2024 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ """ @@ -17,19 +16,20 @@ Note: We need to account for cx_Oracle though oracledb can operate in thick mode. The end user may be is using one of the old conda packs or an environment where cx_Oracle is the only available driver. """ -from ads.common.utils import ORACLE_DEFAULT_PORT - import logging -import numpy as np import os -import pandas as pd import tempfile -from time import time -from typing import Dict, Optional, List, Union, Iterator import zipfile +from time import time +from typing import Dict, Iterator, List, Optional, Union + +import numpy as np +import pandas as pd + from ads.common.decorator.runtime_dependency import ( OptionalDependency, ) +from ads.common.utils import ORACLE_DEFAULT_PORT logger = logging.getLogger("ads.oracle_connector") CX_ORACLE = "cx_Oracle" @@ -40,17 +40,17 @@ import oracledb as oracle_driver # Both the driver share same signature for the APIs that we are using. PYTHON_DRIVER_NAME = PYTHON_ORACLEDB -except: +except ModuleNotFoundError: logger.info("oracledb package not found. Trying to load cx_Oracle") try: import cx_Oracle as oracle_driver PYTHON_DRIVER_NAME = CX_ORACLE - except ModuleNotFoundError: + except ModuleNotFoundError as err2: raise ModuleNotFoundError( f"Neither `oracledb` nor `cx_Oracle` module was not found. Please run " f"`pip install {OptionalDependency.DATA}`." - ) + ) from err2 class OracleRDBMSConnection(oracle_driver.Connection): @@ -75,7 +75,7 @@ def __init__( logger.info( "Running oracledb driver in thick mode. For mTLS based connection, thick mode is default." ) - except: + except Exception: logger.info( "Could not use thick mode. The driver is running in thin mode. System might prompt for passphrase" ) @@ -154,7 +154,6 @@ def insert( batch_size=100000, encoding="utf-8", ): - if if_exists not in ["fail", "replace", "append"]: raise ValueError( f"Unknown option `if_exists`={if_exists}. Valid options are 'fail', 'replace', 'append'" @@ -173,7 +172,6 @@ def insert( df_orcl.columns = df_orcl.columns.str.replace(r"\W+", "_", regex=True) table_exist = True with self.cursor() as cursor: - if if_exists != "replace": try: cursor.execute(f"SELECT 1 from {table_name} FETCH NEXT 1 ROWS ONLY") @@ -275,7 +273,6 @@ def chunks(lst: List, batch_size: int): yield lst[i : i + batch_size] for batch in chunks(record_data, batch_size=batch_size): - cursor.executemany(sql, batch, batcherrors=True) for error in cursor.getbatcherrors(): @@ -304,7 +301,6 @@ def _fetch_by_batch(self, cursor, chunksize): def query( self, sql: str, bind_variables: Optional[Dict], chunksize=None ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: - start_time = time() cursor = self.cursor() @@ -315,10 +311,8 @@ def query( cursor.execute(sql, **bind_variables) columns = [row[0] for row in cursor.description] df = iter( - ( - pd.DataFrame(data=rows, columns=columns) - for rows in self._fetch_by_batch(cursor, chunksize) - ) + pd.DataFrame(data=rows, columns=columns) + for rows in self._fetch_by_batch(cursor, chunksize) ) else: @@ -332,3 +326,21 @@ def query( ) return df + + +def get_adw_connection(vault_secret_id: str) -> "oracledb.Connection": + """Creates ADW connection from the credentials stored in the vault""" + import oracledb + + from ads.secrets.adb import ADBSecretKeeper + + secret = vault_secret_id + + logging.getLogger().debug("A secret id was used to retrieve credentials.") + creds = ADBSecretKeeper.load_secret(secret).to_dict() + user = creds.pop("user_name", None) + password = creds.pop("password", None) + if not user or not password: + raise ValueError(f"The user or password is missing in {secret}") + logging.getLogger().debug("Downloaded secrets successfully.") + return oracledb.connect(user=user, password=password, **creds) \ No newline at end of file