Skip to content

Commit 1157ea9

Browse files
authored
Fix datasource add and delete issues (#147)
1 parent 6da1048 commit 1157ea9

File tree

3 files changed

+14
-24
lines changed

3 files changed

+14
-24
lines changed

llmstack/apps/tasks.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@ def delete_data_entry_task(
3030
datasource_entry_items = datasource_entry_handler.delete_entry(
3131
entry_data.config,
3232
)
33-
logger.debug(
34-
f"Deleted {len(datasource_entry_items)} items from weaviate for data_source_entry: {str(entry_data.uuid)}",
35-
)
33+
if datasource_entry_items:
34+
logger.debug(
35+
f"Deleted {len(datasource_entry_items)} items from weaviate for data_source_entry: {str(entry_data.uuid)}",
36+
)
3637
entry_data.delete()
3738
except weaviate.exceptions.UnexpectedStatusCodeException:
3839
logger.exception("Error deleting data source entry from weaviate")

llmstack/datasources/handlers/website/url.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from llmstack.common.blocks.data.store.vectorstore import Document
88
from llmstack.common.utils.splitter import SpacyTextSplitter
99
from llmstack.common.utils.text_extract import ExtraParams, extract_text_from_url
10-
from llmstack.common.utils.utils import extract_urls_from_sitemap
1110
from llmstack.datasources.handlers.datasource_processor import (
1211
WEAVIATE_SCHEMA,
1312
DataSourceEntryItem,
@@ -116,25 +115,13 @@ def get_url_data(
116115

117116
def validate_and_process(self, data: dict) -> List[DataSourceEntryItem]:
118117
entry = URLSchema(**data)
119-
sitemap_urls = []
120118
# Split urls by newline and then by comma
121119
urls = entry.urls.split("\n")
122120
urls = [url.strip().rstrip() for url_list in [url.split(",") for url in urls] for url in url_list]
123121
# Filter out empty urls
124122
urls = list(set(list(filter(lambda url: url != "", urls))))
125-
sitemap_xmls = list(
126-
filter(lambda url: url.endswith(".xml"), urls),
127-
)
128123
# Filter out sitemap.xml
129124
urls = list(filter(lambda url: not url.endswith(".xml"), urls))
130-
# If sitemap.xml is present, scrape the site to extract urls
131-
try:
132-
for sitemap_xml in sitemap_xmls:
133-
sitmap_xml_urls = extract_urls_from_sitemap(sitemap_xml)
134-
for sitmap_xml_url in sitmap_xml_urls:
135-
sitemap_urls.append(sitmap_xml_url)
136-
except BaseException:
137-
logger.exception("Error in extracting urls from sitemap")
138125

139126
return list(
140127
map(
@@ -145,7 +132,7 @@ def validate_and_process(self, data: dict) -> List[DataSourceEntryItem]:
145132
"connection_id": entry.connection_id,
146133
},
147134
),
148-
urls + sitemap_urls,
135+
urls,
149136
),
150137
)
151138

llmstack/jobs/models.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,15 @@ def save(self, *args, **kwargs):
758758
on_failure = kwargs.pop("on_failure", None)
759759
job_meta = kwargs.pop("job_meta", None)
760760

761+
update_fields = kwargs.get("update_fields", None)
762+
if update_fields:
763+
kwargs["update_fields"] = set(
764+
update_fields,
765+
).union({"updated_at"})
766+
767+
super(AdhocJob, self).save(*args, **kwargs)
768+
769+
if schedule_job:
761770
job = self.rqueue.enqueue(
762771
func,
763772
args=func_args,
@@ -770,13 +779,6 @@ def save(self, *args, **kwargs):
770779
)
771780
self.job_id = job.id
772781

773-
update_fields = kwargs.get("update_fields", None)
774-
if update_fields:
775-
kwargs["update_fields"] = set(
776-
update_fields,
777-
).union({"updated_at"})
778-
super(AdhocJob, self).save(*args, **kwargs)
779-
780782
class Meta:
781783
verbose_name = "Adhoc Job"
782784
verbose_name_plural = "Adhoc Jobs"

0 commit comments

Comments
 (0)