diff --git a/scripts/agent.sql b/scripts/agent.sql index 9b096a4..090b633 100644 --- a/scripts/agent.sql +++ b/scripts/agent.sql @@ -1,20 +1,20 @@ use role accountadmin; SET my_user = CURRENT_USER(); -CREATE OR REPLACE WAREHOUSE sales_intelligence_wh -WITH - WAREHOUSE_SIZE = 'SMALL' - AUTO_SUSPEND = 3600 - AUTO_RESUME = TRUE - INITIALLY_SUSPENDED = FALSE - MIN_CLUSTER_COUNT = 1 - MAX_CLUSTER_COUNT = 1 - SCALING_POLICY = 'STANDARD' -COMMENT = 'Sales intelligence warehouse with 1-hour auto-suspend policy'; -- warehouse creation - - -USE DATABASE SNOWFLAKE_AI_TOOLKIT; -USE SCHEMA DEMO; +--CREATE OR REPLACE WAREHOUSE sales_intelligence_wh +--WITH +-- WAREHOUSE_SIZE = 'SMALL' +-- AUTO_SUSPEND = 3600 +-- AUTO_RESUME = TRUE +-- INITIALLY_SUSPENDED = FALSE +-- MIN_CLUSTER_COUNT = 1 +-- MAX_CLUSTER_COUNT = 1 +-- SCALING_POLICY = 'STANDARD' +--COMMENT = 'Sales intelligence warehouse with 1-hour auto-suspend policy'; -- warehouse creation + + +USE DATABASE COMMON_DB; +USE SCHEMA SNOWFLAKE_AI_TOOLKIT; -- Create tables for sales data CREATE TABLE sales_conversations ( diff --git a/snowflake.yml b/snowflake.yml index dbced7b..50c976e 100644 --- a/snowflake.yml +++ b/snowflake.yml @@ -39,7 +39,14 @@ entities: - src/display_chat.py - src/playground_settings.py - environment.yml - - data/ + - data/fine-tune/ + - data/images/ + - data/rag/contracts/ + - data/rag/repair_manuals/ + - data/samples/emails.csv + - data/samples/solution_center_articles.csv + - data/samples/audio/ + - data/search/ - scripts/ # audio_to_text: diff --git a/src/search.py b/src/search.py index e660233..dee99bd 100644 --- a/src/search.py +++ b/src/search.py @@ -29,7 +29,7 @@ def display_search(session): # st.write(res) - warehouse = config["warehouse"] + warehouse = config["snowflake_connection"]["warehouse"] if create_or_use == "Create": diff --git a/src/utils.py b/src/utils.py index ee36f14..948b4a1 100644 --- a/src/utils.py +++ b/src/utils.py @@ -967,7 +967,7 @@ def run_setup_silently(session: Session, config: dict): create_demo_database_and_stage_if_not_exists(session) create_stages_tables_for_demo(session) snowflake_config = get_snowflake_config() - setup_pdf_text_chunker_demo( + setup_pdf_text_chunker( session, snowflake_config.get("database"), snowflake_config.get("schema") ) create_search_and_rag_for_demo(session) @@ -1508,7 +1508,7 @@ def show_toast_message(message, duration=3, toast_type="info", position="top-rig toast_container.empty() -def setup_pdf_text_chunker(session): +def setup_pdf_text_chunker(session, db, schema): """ Sets up the pdf_text_chunker UDF in the current database and schema. @@ -1520,7 +1520,7 @@ def setup_pdf_text_chunker(session): """ # Check if UDF already exists try: - udf_check_query = "SHOW USER FUNCTIONS LIKE 'pdf_text_chunker'" + udf_check_query = f"SHOW USER FUNCTIONS LIKE 'pdf_text_chunker' IN {db}.{schema}" existing_udfs = session.sql(udf_check_query).collect() if existing_udfs: # st.info("UDF pdf_text_chunker already exists. Skipping creation.") @@ -1530,8 +1530,8 @@ def setup_pdf_text_chunker(session): return # Create UDF if it doesn't exist - create_udf_query = """ - CREATE OR REPLACE FUNCTION pdf_text_chunker(file_url STRING) + create_udf_query = f""" + CREATE OR REPLACE FUNCTION {db}.{schema}.pdf_text_chunker(file_url STRING) RETURNS TABLE (chunk VARCHAR) LANGUAGE PYTHON RUNTIME_VERSION = '3.9' @@ -1576,65 +1576,65 @@ def process(self, file_url: str): except Exception as e: st.error(f"Error creating UDF: {e}") - -def setup_pdf_text_chunker_demo(session, db, schema): - """ - Sets up the pdf_text_chunker UDF in the current database and schema. - - Args: - session: Snowflake session object - - Note: - Creates a Python UDF that can process PDF files and split them into text chunks - """ - - # Create UDF if it doesn't exist - # TODO: Have to make schema dynamic - create_udf_query = """ - CREATE OR REPLACE FUNCTION snowflake_ai_toolkit.public.pdf_text_chunker(file_url STRING) - RETURNS TABLE (chunk VARCHAR) - LANGUAGE PYTHON - RUNTIME_VERSION = '3.9' - HANDLER = 'pdf_text_chunker' - PACKAGES = ('snowflake-snowpark-python', 'PyPDF2', 'langchain') - AS - $$ -import PyPDF2 -import io -import pandas as pd -from snowflake.snowpark.files import SnowflakeFile -from langchain.text_splitter import RecursiveCharacterTextSplitter - -class pdf_text_chunker: - def read_pdf(self, file_url: str) -> str: - with SnowflakeFile.open(file_url, 'rb') as f: - buffer = io.BytesIO(f.readall()) - reader = PyPDF2.PdfReader(buffer) - text = "" - for page in reader.pages: - try: - text += page.extract_text().replace('\\n', ' ').replace('\\0', ' ') - except: - text = "Unable to Extract" - return text - - def process(self, file_url: str): - text = self.read_pdf(file_url) - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=4000, - chunk_overlap=400, - length_function=len - ) - chunks = text_splitter.split_text(text) - df = pd.DataFrame(chunks, columns=['chunk']) - yield from df.itertuples(index=False, name=None) - $$ - """ - try: - session.sql(create_udf_query).collect() - # st.success("UDF pdf_text_chunker created successfully.") - except Exception as e: - print("UDF Already exists!") +#DEPRECATED +#def setup_pdf_text_chunker_demo(session, db, schema): +# """ +# Sets up the pdf_text_chunker UDF in the current database and schema. +# +# Args: +# session: Snowflake session object +# +# Note: +# Creates a Python UDF that can process PDF files and split them into text chunks +# """ +# +# Create UDF if it doesn't exist +# TODO: Have to make schema dynamic +# create_udf_query = """ +# CREATE OR REPLACE FUNCTION snowflake_ai_toolkit.public.pdf_text_chunker(file_url STRING) +# RETURNS TABLE (chunk VARCHAR) +# LANGUAGE PYTHON +# RUNTIME_VERSION = '3.9' +# HANDLER = 'pdf_text_chunker' +# PACKAGES = ('snowflake-snowpark-python', 'PyPDF2', 'langchain') +# AS +# $$ +# import PyPDF2 +# import io +# import pandas as pd +# from snowflake.snowpark.files import SnowflakeFile +# from langchain.text_splitter import RecursiveCharacterTextSplitter + +# class pdf_text_chunker: +# def read_pdf(self, file_url: str) -> str: +# with SnowflakeFile.open(file_url, 'rb') as f: +# buffer = io.BytesIO(f.readall()) +# reader = PyPDF2.PdfReader(buffer) +# text = "" +# for page in reader.pages: +# try: +# text += page.extract_text().replace('\\n', ' ').replace('\\0', ' ') +# except: +# text = "Unable to Extract" +# return text +# +# def process(self, file_url: str): +# text = self.read_pdf(file_url) +# text_splitter = RecursiveCharacterTextSplitter( +# chunk_size=4000, +# chunk_overlap=400, +# length_function=len +# ) +# chunks = text_splitter.split_text(text) +# df = pd.DataFrame(chunks, columns=['chunk']) +# yield from df.itertuples(index=False, name=None) +# $$ +# """ +# try: +# session.sql(create_udf_query).collect() +# # st.success("UDF pdf_text_chunker created successfully.") +# except Exception as e: +# print("UDF Already exists!") def make_llm_call(session, system_prompt, prompt, model): @@ -2685,8 +2685,8 @@ def create_starter_sql(session): print("✓ Inserted sample conversation data") elif "sales_metrics" in statement: print("✓ Inserted sample metrics data") - elif "CREATE OR REPLACE WAREHOUSE" in statement.upper(): - print("✓ Created sales_intelligence_wh warehouse") + #elif "CREATE OR REPLACE WAREHOUSE" in statement.upper(): + # print("✓ Created sales_intelligence_wh warehouse") elif "CREATE OR REPLACE CORTEX SEARCH SERVICE" in statement.upper(): print("✓ Created sales_conversation_search_Demo service") elif (