Skip to content

Commit 6213c2d

Browse files
authored
fix(dataset): add support for file cells in datasets with upload and external URL linking capabilities (#3462)
1 parent 5bd93d9 commit 6213c2d

32 files changed

+2265
-19
lines changed
Lines changed: 356 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,356 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Sample application demonstrating how to use the attachment feature in Traceloop SDK.
4+
5+
This example shows:
6+
1. Creating datasets with external URL attachments (YouTube videos, Google Docs)
7+
2. Creating datasets with file uploads (local images, PDFs)
8+
3. Creating datasets with in-memory data attachments
9+
4. Mixed attachment types in a single dataset
10+
"""
11+
12+
import os
13+
import tempfile
14+
15+
from traceloop.sdk import Traceloop
16+
from traceloop.sdk.datasets import (
17+
Attachment,
18+
Datasets,
19+
ExternalAttachment,
20+
FileCellType,
21+
)
22+
from traceloop.sdk.datasets.model import (
23+
ColumnDefinition,
24+
ColumnType,
25+
CreateDatasetRequest,
26+
)
27+
28+
29+
def example_external_attachments():
30+
"""Example: Creating a dataset with external URL attachments."""
31+
print("\n=== Example 1: External URL Attachments ===")
32+
33+
# Initialize Traceloop
34+
Traceloop.init(app_name="attachment-demo")
35+
datasets = Datasets()
36+
37+
# Create a product catalog with external media
38+
dataset_request = CreateDatasetRequest(
39+
slug="product-catalog-with-media",
40+
name="Product Catalog with Media",
41+
description="Product catalog with videos and documentation links",
42+
columns=[
43+
ColumnDefinition(
44+
slug="product_name", name="Product Name", type=ColumnType.STRING
45+
),
46+
ColumnDefinition(slug="price", name="Price", type=ColumnType.NUMBER),
47+
ColumnDefinition(
48+
slug="demo_video", name="Demo Video", type=ColumnType.FILE
49+
),
50+
ColumnDefinition(
51+
slug="user_manual", name="User Manual", type=ColumnType.FILE
52+
),
53+
],
54+
rows=[
55+
{
56+
"product_name": "Smart Widget Pro",
57+
"price": 299.99,
58+
"demo_video": ExternalAttachment(
59+
url="https://www.youtube.com/watch?v=dQw4w9WgXcQ",
60+
file_type=FileCellType.VIDEO,
61+
metadata={
62+
"title": "Smart Widget Pro Demo",
63+
"duration": "5:32",
64+
"resolution": "1080p",
65+
},
66+
),
67+
"user_manual": ExternalAttachment(
68+
url="https://docs.google.com/document/d/example-manual-id",
69+
file_type=FileCellType.FILE,
70+
metadata={"pages": 45, "format": "Google Docs", "version": "2.1"},
71+
),
72+
},
73+
{
74+
"product_name": "EcoGadget Plus",
75+
"price": 199.99,
76+
"demo_video": ExternalAttachment(
77+
url="https://vimeo.com/123456789",
78+
file_type=FileCellType.VIDEO,
79+
metadata={"title": "EcoGadget Plus Overview", "duration": "3:15"},
80+
),
81+
"user_manual": ExternalAttachment(
82+
url="https://example.com/manuals/ecogadget-plus.pdf",
83+
file_type=FileCellType.FILE,
84+
metadata={"pages": 30, "format": "PDF"},
85+
),
86+
},
87+
],
88+
)
89+
90+
# Create the dataset
91+
dataset = datasets.create(dataset_request)
92+
print(f"Created dataset: {dataset.slug}")
93+
print(f"Total rows: {len(dataset.rows)}")
94+
95+
# Access the attachment information
96+
for row in dataset.rows:
97+
print(f"\nProduct: {row.values['product_name']}")
98+
video = row.values.get("demo_video")
99+
if video:
100+
print(f" Video URL: {video.get('url')}")
101+
print(f" Video Type: {video.get('type')}")
102+
103+
104+
def example_file_uploads():
105+
"""Example: Creating a dataset with file uploads."""
106+
print("\n=== Example 2: File Upload Attachments ===")
107+
108+
# Create temporary test files
109+
# In a real application, these would be actual files
110+
image_file = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False)
111+
image_file.write(b"fake image data for demo")
112+
image_file.close()
113+
114+
pdf_file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
115+
pdf_file.write(b"fake pdf data for demo")
116+
pdf_file.close()
117+
118+
try:
119+
datasets = Datasets()
120+
121+
# Create a dataset with file uploads
122+
dataset_request = CreateDatasetRequest(
123+
slug="employee-records-with-photos",
124+
name="Employee Records with Photos",
125+
description="Employee database with profile photos and resumes",
126+
columns=[
127+
ColumnDefinition(
128+
slug="employee_id", name="Employee ID", type=ColumnType.STRING
129+
),
130+
ColumnDefinition(slug="name", name="Full Name", type=ColumnType.STRING),
131+
ColumnDefinition(
132+
slug="profile_photo", name="Profile Photo", type=ColumnType.FILE
133+
),
134+
ColumnDefinition(slug="resume", name="Resume", type=ColumnType.FILE),
135+
],
136+
rows=[
137+
{
138+
"employee_id": "EMP001",
139+
"name": "Alice Johnson",
140+
"profile_photo": Attachment(
141+
file_path=image_file.name,
142+
file_type=FileCellType.IMAGE,
143+
metadata={
144+
"alt_text": "Alice Johnson profile photo",
145+
"photographer": "Company Photo Services",
146+
"date_taken": "2024-01-15",
147+
},
148+
),
149+
"resume": Attachment(
150+
file_path=pdf_file.name,
151+
file_type=FileCellType.FILE,
152+
content_type="application/pdf",
153+
metadata={
154+
"version": "3.0",
155+
"last_updated": "2024-03-01",
156+
"pages": 2,
157+
},
158+
),
159+
},
160+
],
161+
)
162+
163+
# Create the dataset (uploads will happen automatically)
164+
dataset = datasets.create(dataset_request)
165+
print(f"Created dataset: {dataset.slug}")
166+
167+
# Check upload status
168+
for row in dataset.rows:
169+
print(f"\nEmployee: {row.values['name']}")
170+
photo = row.values.get("profile_photo")
171+
if photo:
172+
print(f" Photo Status: {photo.get('status')}")
173+
print(f" Storage Type: {photo.get('storage')}")
174+
175+
resume = row.values.get("resume")
176+
if resume:
177+
print(f" Resume Status: {resume.get('status')}")
178+
print(f" Storage Type: {resume.get('storage')}")
179+
180+
finally:
181+
# Clean up temporary files
182+
os.unlink(image_file.name)
183+
os.unlink(pdf_file.name)
184+
185+
186+
def example_in_memory_attachments():
187+
"""Example: Creating a dataset with in-memory data attachments."""
188+
datasets = Datasets()
189+
190+
# Generate some in-memory data
191+
# This could be data generated by your application
192+
csv_data = b"name,score\nAlice,95\nBob,87\nCarol,92"
193+
json_data = b'{"config": "example", "version": "1.0"}'
194+
195+
# Create dataset with in-memory attachments
196+
dataset_request = CreateDatasetRequest(
197+
slug="analysis-results",
198+
name="Analysis Results",
199+
description="Results from data analysis with generated reports",
200+
columns=[
201+
ColumnDefinition(
202+
slug="analysis_id", name="Analysis ID", type=ColumnType.STRING
203+
),
204+
ColumnDefinition(
205+
slug="dataset_name", name="Dataset Name", type=ColumnType.STRING
206+
),
207+
ColumnDefinition(
208+
slug="results_csv", name="Results CSV", type=ColumnType.FILE
209+
),
210+
ColumnDefinition(
211+
slug="config_json", name="Configuration", type=ColumnType.FILE
212+
),
213+
],
214+
rows=[
215+
{
216+
"analysis_id": "ANA001",
217+
"dataset_name": "Q4 Sales Analysis",
218+
"results_csv": Attachment(
219+
data=csv_data,
220+
filename="q4_results.csv",
221+
content_type="text/csv",
222+
file_type=FileCellType.FILE,
223+
metadata={"rows": 3, "columns": 2, "analysis_date": "2024-01-10"},
224+
),
225+
"config_json": Attachment(
226+
data=json_data,
227+
filename="analysis_config.json",
228+
content_type="application/json",
229+
file_type=FileCellType.FILE,
230+
metadata={"version": "1.0", "algorithm": "regression"},
231+
),
232+
},
233+
],
234+
)
235+
236+
dataset = datasets.create(dataset_request)
237+
print(f"Created dataset: {dataset.slug}")
238+
print("Attachments uploaded from memory")
239+
240+
241+
def example_mixed_attachments():
242+
"""Example: Creating a dataset with mixed attachment types."""
243+
print("\n=== Example 4: Mixed Attachment Types ===")
244+
245+
# Create a temporary file for local attachment
246+
local_file = tempfile.NamedTemporaryFile(suffix=".txt", delete=False)
247+
local_file.write(b"Sample report content")
248+
local_file.close()
249+
250+
try:
251+
datasets = Datasets()
252+
253+
# Create dataset with different attachment types
254+
dataset_request = CreateDatasetRequest(
255+
slug="project-documentation",
256+
name="Project Documentation",
257+
description="Project docs with various attachment types",
258+
columns=[
259+
ColumnDefinition(
260+
slug="doc_id", name="Document ID", type=ColumnType.STRING
261+
),
262+
ColumnDefinition(slug="title", name="Title", type=ColumnType.STRING),
263+
ColumnDefinition(
264+
slug="attachment", name="Attachment", type=ColumnType.FILE
265+
),
266+
ColumnDefinition(
267+
slug="reference_video", name="Reference Video", type=ColumnType.FILE
268+
),
269+
],
270+
rows=[
271+
{
272+
"doc_id": "DOC001",
273+
"title": "Project Overview",
274+
"attachment": Attachment(
275+
file_path=local_file.name,
276+
file_type=FileCellType.FILE,
277+
metadata={"author": "Team Lead"},
278+
),
279+
"reference_video": ExternalAttachment(
280+
url="https://www.youtube.com/watch?v=example",
281+
file_type=FileCellType.VIDEO,
282+
metadata={"relevance": "high"},
283+
),
284+
},
285+
{
286+
"doc_id": "DOC002",
287+
"title": "Technical Specs",
288+
"attachment": Attachment(
289+
data=b"Technical specification details...",
290+
filename="tech_specs.txt",
291+
file_type=FileCellType.FILE,
292+
),
293+
"reference_video": None, # No video for this document
294+
},
295+
{
296+
"doc_id": "DOC003",
297+
"title": "External Resources",
298+
"attachment": ExternalAttachment(
299+
url="https://github.com/example/repo/blob/main/README.md",
300+
file_type=FileCellType.FILE,
301+
metadata={"type": "markdown"},
302+
),
303+
"reference_video": ExternalAttachment(
304+
url="https://vimeo.com/example-tutorial",
305+
file_type=FileCellType.VIDEO,
306+
),
307+
},
308+
],
309+
)
310+
311+
dataset = datasets.create(dataset_request)
312+
print(f"Created dataset: {dataset.slug}")
313+
314+
# Show the different storage types
315+
for row in dataset.rows:
316+
print(f"\nDocument: {row.values['title']}")
317+
attachment = row.values.get("attachment")
318+
if attachment:
319+
print(f" Attachment Storage: {attachment.get('storage')}")
320+
if attachment.get("storage") == "external":
321+
print(f" URL: {attachment.get('url')}")
322+
323+
finally:
324+
os.unlink(local_file.name)
325+
326+
327+
def main():
328+
"""Run all examples."""
329+
print("=" * 60)
330+
print("Traceloop SDK Attachment Feature Examples")
331+
print("=" * 60)
332+
333+
# Set your API key
334+
# os.environ["TRACELOOP_API_KEY"] = "your-api-key-here"
335+
336+
# Note: These examples use mock data and won't actually upload to S3
337+
# In production, real files would be uploaded to S3 storage
338+
339+
try:
340+
# Run examples
341+
example_external_attachments()
342+
example_file_uploads()
343+
example_in_memory_attachments()
344+
example_mixed_attachments()
345+
346+
print("\n" + "=" * 60)
347+
print("All examples completed successfully!")
348+
print("=" * 60)
349+
350+
except Exception as e:
351+
print(f"\nError: {e}")
352+
print("Make sure to set TRACELOOP_API_KEY environment variable")
353+
354+
355+
if __name__ == "__main__":
356+
main()

packages/sample-app/sample_app/dataset_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Optional
88
from datetime import datetime
99
from traceloop.sdk import Traceloop
10-
from traceloop.sdk.dataset import Dataset, ColumnType, Column, Row
10+
from traceloop.sdk.datasets import Dataset, ColumnType, Column, Row
1111
import pandas as pd
1212
import openai
1313

packages/traceloop-sdk/tests/dataset/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)