Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions scripts/agent.sql
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
use role accountadmin;
SET my_user = CURRENT_USER();

CREATE OR REPLACE WAREHOUSE sales_intelligence_wh
WITH
WAREHOUSE_SIZE = 'SMALL'
AUTO_SUSPEND = 3600
AUTO_RESUME = TRUE
INITIALLY_SUSPENDED = FALSE
MIN_CLUSTER_COUNT = 1
MAX_CLUSTER_COUNT = 1
SCALING_POLICY = 'STANDARD'
COMMENT = 'Sales intelligence warehouse with 1-hour auto-suspend policy'; -- warehouse creation


USE DATABASE SNOWFLAKE_AI_TOOLKIT;
USE SCHEMA DEMO;
--CREATE OR REPLACE WAREHOUSE sales_intelligence_wh
--WITH
-- WAREHOUSE_SIZE = 'SMALL'
-- AUTO_SUSPEND = 3600
-- AUTO_RESUME = TRUE
-- INITIALLY_SUSPENDED = FALSE
-- MIN_CLUSTER_COUNT = 1
-- MAX_CLUSTER_COUNT = 1
-- SCALING_POLICY = 'STANDARD'
--COMMENT = 'Sales intelligence warehouse with 1-hour auto-suspend policy'; -- warehouse creation


USE DATABASE COMMON_DB;
USE SCHEMA SNOWFLAKE_AI_TOOLKIT;

-- Create tables for sales data
CREATE TABLE sales_conversations (
Expand Down
9 changes: 8 additions & 1 deletion snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,14 @@ entities:
- src/display_chat.py
- src/playground_settings.py
- environment.yml
- data/
- data/fine-tune/
- data/images/
- data/rag/contracts/
- data/rag/repair_manuals/
- data/samples/emails.csv
- data/samples/solution_center_articles.csv
- data/samples/audio/
- data/search/
- scripts/

# audio_to_text:
Expand Down
2 changes: 1 addition & 1 deletion src/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def display_search(session):
# st.write(res)


warehouse = config["warehouse"]
warehouse = config["snowflake_connection"]["warehouse"]

if create_or_use == "Create":

Expand Down
132 changes: 66 additions & 66 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@ def run_setup_silently(session: Session, config: dict):
create_demo_database_and_stage_if_not_exists(session)
create_stages_tables_for_demo(session)
snowflake_config = get_snowflake_config()
setup_pdf_text_chunker_demo(
setup_pdf_text_chunker(
session, snowflake_config.get("database"), snowflake_config.get("schema")
)
create_search_and_rag_for_demo(session)
Expand Down Expand Up @@ -1508,7 +1508,7 @@ def show_toast_message(message, duration=3, toast_type="info", position="top-rig
toast_container.empty()


def setup_pdf_text_chunker(session):
def setup_pdf_text_chunker(session, db, schema):
"""
Sets up the pdf_text_chunker UDF in the current database and schema.

Expand All @@ -1520,7 +1520,7 @@ def setup_pdf_text_chunker(session):
"""
# Check if UDF already exists
try:
udf_check_query = "SHOW USER FUNCTIONS LIKE 'pdf_text_chunker'"
udf_check_query = f"SHOW USER FUNCTIONS LIKE 'pdf_text_chunker' IN {db}.{schema}"
existing_udfs = session.sql(udf_check_query).collect()
if existing_udfs:
# st.info("UDF pdf_text_chunker already exists. Skipping creation.")
Expand All @@ -1530,8 +1530,8 @@ def setup_pdf_text_chunker(session):
return

# Create UDF if it doesn't exist
create_udf_query = """
CREATE OR REPLACE FUNCTION pdf_text_chunker(file_url STRING)
create_udf_query = f"""
CREATE OR REPLACE FUNCTION {db}.{schema}.pdf_text_chunker(file_url STRING)
RETURNS TABLE (chunk VARCHAR)
LANGUAGE PYTHON
RUNTIME_VERSION = '3.9'
Expand Down Expand Up @@ -1576,65 +1576,65 @@ def process(self, file_url: str):
except Exception as e:
st.error(f"Error creating UDF: {e}")


def setup_pdf_text_chunker_demo(session, db, schema):
"""
Sets up the pdf_text_chunker UDF in the current database and schema.

Args:
session: Snowflake session object

Note:
Creates a Python UDF that can process PDF files and split them into text chunks
"""

# Create UDF if it doesn't exist
# TODO: Have to make schema dynamic
create_udf_query = """
CREATE OR REPLACE FUNCTION snowflake_ai_toolkit.public.pdf_text_chunker(file_url STRING)
RETURNS TABLE (chunk VARCHAR)
LANGUAGE PYTHON
RUNTIME_VERSION = '3.9'
HANDLER = 'pdf_text_chunker'
PACKAGES = ('snowflake-snowpark-python', 'PyPDF2', 'langchain')
AS
$$
import PyPDF2
import io
import pandas as pd
from snowflake.snowpark.files import SnowflakeFile
from langchain.text_splitter import RecursiveCharacterTextSplitter

class pdf_text_chunker:
def read_pdf(self, file_url: str) -> str:
with SnowflakeFile.open(file_url, 'rb') as f:
buffer = io.BytesIO(f.readall())
reader = PyPDF2.PdfReader(buffer)
text = ""
for page in reader.pages:
try:
text += page.extract_text().replace('\\n', ' ').replace('\\0', ' ')
except:
text = "Unable to Extract"
return text

def process(self, file_url: str):
text = self.read_pdf(file_url)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=4000,
chunk_overlap=400,
length_function=len
)
chunks = text_splitter.split_text(text)
df = pd.DataFrame(chunks, columns=['chunk'])
yield from df.itertuples(index=False, name=None)
$$
"""
try:
session.sql(create_udf_query).collect()
# st.success("UDF pdf_text_chunker created successfully.")
except Exception as e:
print("UDF Already exists!")
#DEPRECATED
#def setup_pdf_text_chunker_demo(session, db, schema):
# """
# Sets up the pdf_text_chunker UDF in the current database and schema.
#
# Args:
# session: Snowflake session object
#
# Note:
# Creates a Python UDF that can process PDF files and split them into text chunks
# """
#
# Create UDF if it doesn't exist
# TODO: Have to make schema dynamic
# create_udf_query = """
# CREATE OR REPLACE FUNCTION snowflake_ai_toolkit.public.pdf_text_chunker(file_url STRING)
# RETURNS TABLE (chunk VARCHAR)
# LANGUAGE PYTHON
# RUNTIME_VERSION = '3.9'
# HANDLER = 'pdf_text_chunker'
# PACKAGES = ('snowflake-snowpark-python', 'PyPDF2', 'langchain')
# AS
# $$
# import PyPDF2
# import io
# import pandas as pd
# from snowflake.snowpark.files import SnowflakeFile
# from langchain.text_splitter import RecursiveCharacterTextSplitter

# class pdf_text_chunker:
# def read_pdf(self, file_url: str) -> str:
# with SnowflakeFile.open(file_url, 'rb') as f:
# buffer = io.BytesIO(f.readall())
# reader = PyPDF2.PdfReader(buffer)
# text = ""
# for page in reader.pages:
# try:
# text += page.extract_text().replace('\\n', ' ').replace('\\0', ' ')
# except:
# text = "Unable to Extract"
# return text
#
# def process(self, file_url: str):
# text = self.read_pdf(file_url)
# text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=4000,
# chunk_overlap=400,
# length_function=len
# )
# chunks = text_splitter.split_text(text)
# df = pd.DataFrame(chunks, columns=['chunk'])
# yield from df.itertuples(index=False, name=None)
# $$
# """
# try:
# session.sql(create_udf_query).collect()
# # st.success("UDF pdf_text_chunker created successfully.")
# except Exception as e:
# print("UDF Already exists!")


def make_llm_call(session, system_prompt, prompt, model):
Expand Down Expand Up @@ -2685,8 +2685,8 @@ def create_starter_sql(session):
print("✓ Inserted sample conversation data")
elif "sales_metrics" in statement:
print("✓ Inserted sample metrics data")
elif "CREATE OR REPLACE WAREHOUSE" in statement.upper():
print("✓ Created sales_intelligence_wh warehouse")
#elif "CREATE OR REPLACE WAREHOUSE" in statement.upper():
# print("✓ Created sales_intelligence_wh warehouse")
elif "CREATE OR REPLACE CORTEX SEARCH SERVICE" in statement.upper():
print("✓ Created sales_conversation_search_Demo service")
elif (
Expand Down