Skip to content

Commit d8245dc

Browse files
authored
Evaluation output configuration improvements (#1849)
* Evaluation output configuration improvements * remove console.log
1 parent 076c530 commit d8245dc

File tree

52 files changed

+1156
-597
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1156
-597
lines changed

apps/gateway/src/routes/api/v3/conversations/annotate/annotate.handler.test.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,16 @@ describe('POST /conversations/:conversationUuid/evaluations/:evaluationUuid/anno
5454

5555
// Default evaluation configuration
5656
const DEFAULT_EVALUATION_CONFIG = {
57+
reverseScale: false,
58+
actualOutput: {
59+
messageSelection: 'last' as const,
60+
parsingFormat: 'string' as const,
61+
},
62+
expectedOutput: {
63+
parsingFormat: 'string' as const,
64+
},
5765
minRating: 1,
5866
maxRating: 5,
59-
reverseScale: false,
6067
}
6168

6269
type TestSetupOptions = {

apps/web/src/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/(withTabs)/evaluations/[evaluationUuid]/editor/_components/EvaluationEditor/Playground/EvaluationParams/HistoryLogParams/useSerializedLogs.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1+
import { useCurrentCommit } from '$/app/providers/CommitProvider'
2+
import { useCurrentProject } from '$/app/providers/ProjectProvider'
13
import { useDefaultLogFilterOptions } from '$/hooks/logFilters/useDefaultLogFilterOptions'
24
import useDocumentLogWithPaginationPosition, {
35
LogWithPosition,
46
} from '$/stores/documentLogWithPaginationPosition'
57
import useEvaluatedDocumentLogs from '$/stores/evaluatedDocumentLogs'
68
import useDocumentLogsPagination from '$/stores/useDocumentLogsPagination'
7-
import { useCurrentCommit } from '$/app/providers/CommitProvider'
8-
import { useCurrentProject } from '$/app/providers/ProjectProvider'
9-
import { useCallback, useMemo, useState } from 'react'
109
import {
1110
ActualOutputConfiguration,
1211
EvaluatedDocumentLog,
1312
} from '@latitude-data/core/constants'
1413
import { DocumentVersion } from '@latitude-data/core/schema/models/types/DocumentVersion'
14+
import { useCallback, useMemo, useState } from 'react'
1515

1616
const ONLY_ONE_PAGE = '1'
1717

@@ -23,7 +23,7 @@ export function useSerializedLogs({
2323
logUuid,
2424
}: {
2525
document: DocumentVersion
26-
configuration?: ActualOutputConfiguration
26+
configuration: ActualOutputConfiguration
2727
onHistoryFetched?: OnHistoryFetchedFn
2828
logUuid?: string
2929
}) {

apps/web/src/app/api/projects/[projectId]/documents/[documentUuid]/evaluatedLogs/route.ts

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
11
import { authHandler } from '$/middlewares/authHandler'
22
import { errorHandler } from '$/middlewares/errorHandler'
3+
import {
4+
ActualOutputConfiguration,
5+
DocumentLog,
6+
EvaluatedDocumentLog,
7+
} from '@latitude-data/core/constants'
38
import { findLastProviderLogFromDocumentLogUuid } from '@latitude-data/core/data-access/providerLogs'
9+
import {
10+
buildConversation,
11+
formatConversation,
12+
} from '@latitude-data/core/helpers'
413
import { UnprocessableEntityError } from '@latitude-data/core/lib/errors'
514
import {
615
DocumentVersionsRepository,
716
ProviderLogsRepository,
817
} from '@latitude-data/core/repositories'
18+
import { ProviderLog } from '@latitude-data/core/schema/models/types/ProviderLog'
19+
import { Workspace } from '@latitude-data/core/schema/models/types/Workspace'
20+
import { ProviderLogDto } from '@latitude-data/core/schema/types'
921
import { computeDocumentLogs } from '@latitude-data/core/services/documentLogs/computeDocumentLogs'
1022
import { parseApiDocumentLogParams } from '@latitude-data/core/services/documentLogs/logsFilterUtils/parseApiLogFilterParams'
1123
import { serializeAggregatedProviderLog } from '@latitude-data/core/services/documentLogs/serialize'
12-
import { buildProviderLogResponse } from '@latitude-data/core/services/providerLogs/buildResponse'
1324
import { extractActualOutput } from '@latitude-data/core/services/evaluationsV2/outputs/extract'
25+
import { buildProviderLogResponse } from '@latitude-data/core/services/providerLogs/buildResponse'
1426
import { NextRequest, NextResponse } from 'next/server'
15-
import {
16-
ActualOutputConfiguration,
17-
DocumentLog,
18-
EvaluatedDocumentLog,
19-
} from '@latitude-data/core/constants'
20-
import {
21-
buildConversation,
22-
formatConversation,
23-
} from '@latitude-data/core/helpers'
24-
import { ProviderLogDto } from '@latitude-data/core/schema/types'
2527

26-
import { Workspace } from '@latitude-data/core/schema/models/types/Workspace'
27-
import { ProviderLog } from '@latitude-data/core/schema/models/types/ProviderLog'
2828
export const GET = errorHandler(
2929
authHandler(
3030
async (
@@ -112,7 +112,7 @@ async function serializeEvaluatedDocumentLog({
112112
}: {
113113
documentLog: DocumentLog
114114
providerLogs: ProviderLog[]
115-
configuration?: ActualOutputConfiguration
115+
configuration: ActualOutputConfiguration
116116
}): Promise<EvaluatedDocumentLog> {
117117
const providerLog = (await findLastProviderLogFromDocumentLogUuid(
118118
documentLog.uuid,

apps/web/src/components/evaluations/ConfigurationForm.tsx

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
import { useSerializedLogs } from '$/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/(withTabs)/evaluations/[evaluationUuid]/editor/_components/EvaluationEditor/Playground/EvaluationParams/HistoryLogParams/useSerializedLogs'
2+
import { useCurrentCommit } from '$/app/providers/CommitProvider'
23
import { useCurrentDocument } from '$/app/providers/DocumentProvider'
4+
import { useCurrentProject } from '$/app/providers/ProjectProvider'
5+
import { MessageList, MessageListSkeleton } from '$/components/ChatWrapper'
36
import { ROUTES } from '$/services/routes'
7+
import {
8+
ACCESSIBLE_OUTPUT_FORMATS,
9+
ActualOutputConfiguration,
10+
EvaluationMetric,
11+
EvaluationType,
12+
baseEvaluationConfiguration,
13+
} from '@latitude-data/core/constants'
414
import { Alert } from '@latitude-data/web-ui/atoms/Alert'
515
import { Button } from '@latitude-data/web-ui/atoms/Button'
616
import { FormFieldGroup } from '@latitude-data/web-ui/atoms/FormFieldGroup'
@@ -9,44 +19,36 @@ import { LineSeparator } from '@latitude-data/web-ui/atoms/LineSeparator'
919
import { Select } from '@latitude-data/web-ui/atoms/Select'
1020
import { Skeleton } from '@latitude-data/web-ui/atoms/Skeleton'
1121
import { Text } from '@latitude-data/web-ui/atoms/Text'
12-
import { MessageList, MessageListSkeleton } from '$/components/ChatWrapper'
1322
import { SelectableSwitch } from '@latitude-data/web-ui/molecules/SelectableSwitch'
1423
import { ClickToCopyUuid } from '@latitude-data/web-ui/organisms/ClickToCopyUuid'
15-
import { useCurrentCommit } from '$/app/providers/CommitProvider'
16-
import { useCurrentProject } from '$/app/providers/ProjectProvider'
1724
import { useRouter } from 'next/navigation'
1825
import { useEffect, useMemo, useState } from 'react'
1926
import { useDebounce } from 'use-debounce'
2027
import { ConfigurationFormProps, EVALUATION_SPECIFICATIONS } from './index'
21-
import {
22-
ACCESSIBLE_OUTPUT_FORMATS,
23-
ActualOutputConfiguration,
24-
EvaluationMetric,
25-
EvaluationType,
26-
baseEvaluationConfiguration,
27-
} from '@latitude-data/core/constants'
2828

29-
const MESSAGE_SELECTION_OPTIONS = baseEvaluationConfiguration.shape.actualOutput
30-
.unwrap()
31-
.shape.messageSelection.options.map((option) => ({
32-
label: option.toUpperCase().split('_').join(' '),
33-
value: option,
34-
}))
29+
const MESSAGE_SELECTION_OPTIONS =
30+
baseEvaluationConfiguration.shape.actualOutput.shape.messageSelection.options.map(
31+
(option) => ({
32+
label: option.toUpperCase().split('_').join(' '),
33+
value: option,
34+
}),
35+
)
3536

36-
const CONTENT_FILTER_OPTIONS = baseEvaluationConfiguration.shape.actualOutput
37-
.unwrap()
38-
.shape.contentFilter.unwrap()
39-
.options.map((option) => ({
40-
label: option.toUpperCase().split('_').join(' '),
41-
value: option,
42-
}))
37+
const CONTENT_FILTER_OPTIONS =
38+
baseEvaluationConfiguration.shape.actualOutput.shape.contentFilter
39+
.unwrap()
40+
.options.map((option) => ({
41+
label: option.toUpperCase().split('_').join(' '),
42+
value: option,
43+
}))
4344

44-
const PARSING_FORMAT_OPTIONS = baseEvaluationConfiguration.shape.actualOutput
45-
.unwrap()
46-
.shape.parsingFormat.options.map((option) => ({
47-
label: option.toUpperCase().split('_').join(' '),
48-
value: option,
49-
}))
45+
const PARSING_FORMAT_OPTIONS =
46+
baseEvaluationConfiguration.shape.actualOutput.shape.parsingFormat.options.map(
47+
(option) => ({
48+
label: option.toUpperCase().split('_').join(' '),
49+
value: option,
50+
}),
51+
)
5052

5153
export function ConfigurationSimpleForm<
5254
T extends EvaluationType,
@@ -102,7 +104,7 @@ export function ConfigurationAdvancedForm<
102104
// eslint-disable-next-line react-hooks/exhaustive-deps
103105
}, [formatIsAccessible])
104106

105-
const [testConfiguration] = useDebounce(configuration.actualOutput, 333)
107+
const [testConfiguration] = useDebounce(configuration.actualOutput, 750)
106108
const [showTest, setShowTest] = useState(false)
107109

108110
const typeSpecification = EVALUATION_SPECIFICATIONS[type]
@@ -150,7 +152,7 @@ export function ConfigurationAdvancedForm<
150152
}}
151153
onClick={(event) => {
152154
event.preventDefault()
153-
setShowTest(!showTest)
155+
setShowTest(!showTest && !!configuration.actualOutput)
154156
}}
155157
>
156158
{showTest ? 'Close' : 'Test'}
@@ -209,6 +211,19 @@ export function ConfigurationAdvancedForm<
209211
label='Parsing format'
210212
description='How to parse the assistant messages'
211213
layout='horizontal'
214+
tooltip={
215+
formatIsAccessible ? (
216+
<Text.H6 color='background'>
217+
Use a field accessor to extract a specific field from the output
218+
using dot notation.
219+
<br />
220+
<br />
221+
- Access a field: arguments, arguments.options (nested)
222+
<br />- Access a list: [0] (first), [-1] (last)
223+
<br />- Combine both: [0].arguments.options[-1]
224+
</Text.H6>
225+
) : undefined
226+
}
212227
>
213228
<Select
214229
value={configuration.actualOutput?.parsingFormat ?? 'string'}
@@ -250,7 +265,7 @@ export function ConfigurationAdvancedForm<
250265
)}
251266
</FormFieldGroup>
252267
</FormFieldGroup>
253-
{showTest && (
268+
{showTest && !!configuration.actualOutput && (
254269
<div className='w-full flex flex-col gap-2 border-t-2 border-dashed border-border pt-4'>
255270
<ActualOutputTest configuration={testConfiguration} />
256271
</div>
@@ -262,7 +277,7 @@ export function ConfigurationAdvancedForm<
262277
function ActualOutputTest({
263278
configuration,
264279
}: {
265-
configuration?: ActualOutputConfiguration
280+
configuration: ActualOutputConfiguration
266281
}) {
267282
const { project } = useCurrentProject()
268283
const { commit } = useCurrentCommit()

apps/web/src/components/evaluations/ResultPanel.tsx

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
'use client'
22

33
import { DATASET_TABLE_PAGE_SIZE } from '$/app/(private)/datasets/_components/DatasetsTable'
4-
import { MetadataInfoTabs } from '$/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/_components/MetadataInfoTabs'
54
import { DocumentLogParameters } from '$/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/(withTabs)/logs/_components/DocumentLogs/DocumentLogInfo/Metadata'
5+
import { MetadataInfoTabs } from '$/app/(private)/projects/[projectId]/versions/[commitUuid]/documents/[documentUuid]/_components/MetadataInfoTabs'
66
import { useCurrentDocument } from '$/app/providers/DocumentProvider'
7+
import {
8+
useCurrentProject,
9+
type IProjectContextType,
10+
} from '$/app/providers/ProjectProvider'
711
import { MetadataItem } from '$/components/MetadataItem'
812
import { useDatasetRole } from '$/hooks/useDatasetRoles'
913
import { useStickyNested } from '$/hooks/useStickyNested'
@@ -12,7 +16,18 @@ import useDatasetRows from '$/stores/datasetRows'
1216
import useDatasetRowCount from '$/stores/datasetRows/count'
1317
import useDatasetRowPosition from '$/stores/datasetRows/position'
1418
import useDocumentLog from '$/stores/documentLogWithMetadata'
19+
import {
20+
ACCESSIBLE_OUTPUT_FORMATS,
21+
DocumentLog,
22+
EvaluationMetric,
23+
EvaluationResultV2,
24+
EvaluationType,
25+
} from '@latitude-data/core/constants'
1526
import { buildPagination } from '@latitude-data/core/lib/pagination/buildPagination'
27+
import { Commit } from '@latitude-data/core/schema/models/types/Commit'
28+
import { Dataset } from '@latitude-data/core/schema/models/types/Dataset'
29+
import { DatasetRow } from '@latitude-data/core/schema/models/types/DatasetRow'
30+
import { DocumentVersion } from '@latitude-data/core/schema/models/types/DocumentVersion'
1631
import { Alert } from '@latitude-data/web-ui/atoms/Alert'
1732
import { Button } from '@latitude-data/web-ui/atoms/Button'
1833
import { Modal } from '@latitude-data/web-ui/atoms/Modal'
@@ -21,38 +36,12 @@ import { Text } from '@latitude-data/web-ui/atoms/Text'
2136
import { TextArea } from '@latitude-data/web-ui/atoms/TextArea'
2237
import { ClickToCopy } from '@latitude-data/web-ui/molecules/ClickToCopy'
2338
import { TableSkeleton } from '@latitude-data/web-ui/molecules/TableSkeleton'
24-
import {
25-
useCurrentProject,
26-
type IProjectContextType,
27-
} from '$/app/providers/ProjectProvider'
2839
import { format } from 'date-fns'
2940
import dynamic from 'next/dynamic'
3041
import Link from 'next/link'
3142
import { useEffect, useMemo, useRef, useState } from 'react'
3243
import { EVALUATION_SPECIFICATIONS, ResultPanelProps } from './index'
3344
import ResultBadge from './ResultBadge'
34-
import {
35-
ACCESSIBLE_OUTPUT_FORMATS,
36-
DocumentLog,
37-
EvaluationMetric,
38-
EvaluationResultV2,
39-
EvaluationType,
40-
baseEvaluationConfiguration,
41-
} from '@latitude-data/core/constants'
42-
import { Dataset } from '@latitude-data/core/schema/models/types/Dataset'
43-
44-
import { Commit } from '@latitude-data/core/schema/models/types/Commit'
45-
import { DocumentVersion } from '@latitude-data/core/schema/models/types/DocumentVersion'
46-
import { DatasetRow } from '@latitude-data/core/schema/models/types/DatasetRow'
47-
const PARSING_FORMAT_LABELS = baseEvaluationConfiguration.shape.actualOutput
48-
.unwrap()
49-
.shape.parsingFormat.options.reduce(
50-
(acc, option) => {
51-
acc[option] = option.toUpperCase().split('_').join(' ')
52-
return acc
53-
},
54-
{} as Record<string, string>,
55-
)
5645

5746
const DataGrid = dynamic(
5847
() =>
@@ -198,14 +187,13 @@ function ResultPanelMetadata<
198187
) ? (
199188
<Text.H6 color='foregroundMuted' noWrap ellipsis>
200189
Parsed from{' '}
201-
{
202-
PARSING_FORMAT_LABELS[
203-
result.metadata!.configuration.actualOutput!.parsingFormat
204-
]
205-
}
190+
{result
191+
.metadata!.configuration.actualOutput.parsingFormat.toUpperCase()
192+
.split('_')
193+
.join(' ')}
206194
{!!result.metadata!.configuration.actualOutput
207-
?.fieldAccessor &&
208-
` using field '${result.metadata!.configuration.actualOutput!.fieldAccessor}'`}
195+
.fieldAccessor &&
196+
` using field '${result.metadata!.configuration.actualOutput.fieldAccessor}'`}
209197
</Text.H6>
210198
) : (
211199
<div />

apps/web/src/hooks/playgroundChat/useProviderEventHandler.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -402,10 +402,7 @@ export function useProviderEventHandler({
402402
handleReasoningDelta(data)
403403
break
404404
case 'reasoning-start':
405-
handleReasoningDelta({
406-
type: 'reasoning-delta',
407-
text: '',
408-
})
405+
handleReasoningDelta({ type: 'reasoning-delta', text: '' })
409406
break
410407
case 'file':
411408
handleFile(data)

apps/web/src/stores/evaluatedDocumentLogs.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import useFetcher from '$/hooks/useFetcher'
22
import { ROUTES } from '$/services/routes'
3-
import { useMemo, useState } from 'react'
4-
import useSWR, { SWRConfiguration } from 'swr'
53
import {
64
ActualOutputConfiguration,
75
DocumentLogFilterOptions,
86
EvaluatedDocumentLog,
97
} from '@latitude-data/core/constants'
8+
import { useMemo, useState } from 'react'
9+
import useSWR, { SWRConfiguration } from 'swr'
1010

1111
const EMPTY_ARRAY: [] = []
1212
export default function useEvaluatedDocumentLogs(
@@ -24,7 +24,7 @@ export default function useEvaluatedDocumentLogs(
2424
filterOptions: DocumentLogFilterOptions
2525
page: string | null | undefined
2626
pageSize: string | null
27-
configuration?: ActualOutputConfiguration
27+
configuration: ActualOutputConfiguration
2828
onFetched?: (logs: EvaluatedDocumentLog[]) => void
2929
},
3030
{ fallbackData }: SWRConfiguration = {},
@@ -40,9 +40,7 @@ export default function useEvaluatedDocumentLogs(
4040
page: page ? Number(page) : undefined,
4141
pageSize: pageSize ? Number(pageSize) : undefined,
4242
filterOptions,
43-
configuration: configuration
44-
? JSON.stringify(configuration)
45-
: undefined,
43+
configuration: JSON.stringify(configuration),
4644
})
4745
: undefined,
4846
{

0 commit comments

Comments
 (0)