|
'use client' |
|
import { useTranslation } from 'react-i18next' |
|
import { formatFileSize, formatNumber, formatTime } from '@/utils/format' |
|
import type { DocType } from '@/models/datasets' |
|
import useTimestamp from '@/hooks/use-timestamp' |
|
|
|
export type inputType = 'input' | 'select' | 'textarea' |
|
export type metadataType = DocType | 'originInfo' | 'technicalParameters' |
|
|
|
type MetadataMap = |
|
Record< |
|
metadataType, |
|
{ |
|
text: string |
|
allowEdit?: boolean |
|
icon?: React.ReactNode |
|
iconName?: string |
|
subFieldsMap: Record< |
|
string, |
|
{ |
|
label: string |
|
inputType?: inputType |
|
field?: string |
|
render?: (value: any, total?: number) => React.ReactNode | string |
|
} |
|
> |
|
} |
|
> |
|
|
|
const fieldPrefix = 'datasetDocuments.metadata.field' |
|
|
|
export const useMetadataMap = (): MetadataMap => { |
|
const { t } = useTranslation() |
|
const { formatTime: formatTimestamp } = useTimestamp() |
|
|
|
return { |
|
book: { |
|
text: t('datasetDocuments.metadata.type.book'), |
|
iconName: 'bookOpen', |
|
subFieldsMap: { |
|
title: { label: t(`${fieldPrefix}.book.title`) }, |
|
language: { |
|
label: t(`${fieldPrefix}.book.language`), |
|
inputType: 'select', |
|
}, |
|
author: { label: t(`${fieldPrefix}.book.author`) }, |
|
publisher: { label: t(`${fieldPrefix}.book.publisher`) }, |
|
publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) }, |
|
isbn: { label: t(`${fieldPrefix}.book.ISBN`) }, |
|
category: { |
|
label: t(`${fieldPrefix}.book.category`), |
|
inputType: 'select', |
|
}, |
|
}, |
|
}, |
|
web_page: { |
|
text: t('datasetDocuments.metadata.type.webPage'), |
|
iconName: 'globe', |
|
subFieldsMap: { |
|
'title': { label: t(`${fieldPrefix}.webPage.title`) }, |
|
'url': { label: t(`${fieldPrefix}.webPage.url`) }, |
|
'language': { |
|
label: t(`${fieldPrefix}.webPage.language`), |
|
inputType: 'select', |
|
}, |
|
'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) }, |
|
'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) }, |
|
'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) }, |
|
'description': { label: t(`${fieldPrefix}.webPage.description`) }, |
|
}, |
|
}, |
|
paper: { |
|
text: t('datasetDocuments.metadata.type.paper'), |
|
iconName: 'graduationHat', |
|
subFieldsMap: { |
|
'title': { label: t(`${fieldPrefix}.paper.title`) }, |
|
'language': { |
|
label: t(`${fieldPrefix}.paper.language`), |
|
inputType: 'select', |
|
}, |
|
'author': { label: t(`${fieldPrefix}.paper.author`) }, |
|
'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) }, |
|
'journal/conference_name': { |
|
label: t(`${fieldPrefix}.paper.journalConferenceName`), |
|
}, |
|
'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) }, |
|
'doi': { label: t(`${fieldPrefix}.paper.DOI`) }, |
|
'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) }, |
|
'abstract': { |
|
label: t(`${fieldPrefix}.paper.abstract`), |
|
inputType: 'textarea', |
|
}, |
|
}, |
|
}, |
|
social_media_post: { |
|
text: t('datasetDocuments.metadata.type.socialMediaPost'), |
|
iconName: 'atSign', |
|
subFieldsMap: { |
|
'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) }, |
|
'author/username': { |
|
label: t(`${fieldPrefix}.socialMediaPost.authorUsername`), |
|
}, |
|
'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) }, |
|
'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) }, |
|
'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) }, |
|
}, |
|
}, |
|
personal_document: { |
|
text: t('datasetDocuments.metadata.type.personalDocument'), |
|
iconName: 'file', |
|
subFieldsMap: { |
|
'title': { label: t(`${fieldPrefix}.personalDocument.title`) }, |
|
'author': { label: t(`${fieldPrefix}.personalDocument.author`) }, |
|
'creation_date': { |
|
label: t(`${fieldPrefix}.personalDocument.creationDate`), |
|
}, |
|
'last_modified_date': { |
|
label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`), |
|
}, |
|
'document_type': { |
|
label: t(`${fieldPrefix}.personalDocument.documentType`), |
|
inputType: 'select', |
|
}, |
|
'tags/category': { |
|
label: t(`${fieldPrefix}.personalDocument.tagsCategory`), |
|
}, |
|
}, |
|
}, |
|
business_document: { |
|
text: t('datasetDocuments.metadata.type.businessDocument'), |
|
iconName: 'briefcase', |
|
subFieldsMap: { |
|
'title': { label: t(`${fieldPrefix}.businessDocument.title`) }, |
|
'author': { label: t(`${fieldPrefix}.businessDocument.author`) }, |
|
'creation_date': { |
|
label: t(`${fieldPrefix}.businessDocument.creationDate`), |
|
}, |
|
'last_modified_date': { |
|
label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`), |
|
}, |
|
'document_type': { |
|
label: t(`${fieldPrefix}.businessDocument.documentType`), |
|
inputType: 'select', |
|
}, |
|
'department/team': { |
|
label: t(`${fieldPrefix}.businessDocument.departmentTeam`), |
|
}, |
|
}, |
|
}, |
|
im_chat_log: { |
|
text: t('datasetDocuments.metadata.type.IMChat'), |
|
iconName: 'messageTextCircle', |
|
subFieldsMap: { |
|
'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) }, |
|
'chat_participants/group_name': { |
|
label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`), |
|
}, |
|
'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) }, |
|
'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) }, |
|
'participants': { label: t(`${fieldPrefix}.IMChat.participants`) }, |
|
'topicsKeywords': { |
|
label: t(`${fieldPrefix}.IMChat.topicsKeywords`), |
|
inputType: 'textarea', |
|
}, |
|
'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) }, |
|
}, |
|
}, |
|
wikipedia_entry: { |
|
text: t('datasetDocuments.metadata.type.wikipediaEntry'), |
|
allowEdit: false, |
|
subFieldsMap: { |
|
'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) }, |
|
'language': { |
|
label: t(`${fieldPrefix}.wikipediaEntry.language`), |
|
inputType: 'select', |
|
}, |
|
'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) }, |
|
'editor/contributor': { |
|
label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`), |
|
}, |
|
'last_edit_date': { |
|
label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`), |
|
}, |
|
'summary/introduction': { |
|
label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`), |
|
inputType: 'textarea', |
|
}, |
|
}, |
|
}, |
|
synced_from_notion: { |
|
text: t('datasetDocuments.metadata.type.notion'), |
|
allowEdit: false, |
|
subFieldsMap: { |
|
'title': { label: t(`${fieldPrefix}.notion.title`) }, |
|
'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' }, |
|
'author/creator': { label: t(`${fieldPrefix}.notion.author`) }, |
|
'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) }, |
|
'last_modified_date': { |
|
label: t(`${fieldPrefix}.notion.lastModifiedTime`), |
|
}, |
|
'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) }, |
|
'category/tags': { label: t(`${fieldPrefix}.notion.tag`) }, |
|
'description': { label: t(`${fieldPrefix}.notion.desc`) }, |
|
}, |
|
}, |
|
synced_from_github: { |
|
text: t('datasetDocuments.metadata.type.github'), |
|
allowEdit: false, |
|
subFieldsMap: { |
|
'repository_name': { label: t(`${fieldPrefix}.github.repoName`) }, |
|
'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) }, |
|
'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) }, |
|
'code_filename': { label: t(`${fieldPrefix}.github.fileName`) }, |
|
'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) }, |
|
'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) }, |
|
'github_link': { label: t(`${fieldPrefix}.github.url`) }, |
|
'open_source_license': { label: t(`${fieldPrefix}.github.license`) }, |
|
'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) }, |
|
'commit_author': { |
|
label: t(`${fieldPrefix}.github.lastCommitAuthor`), |
|
}, |
|
}, |
|
}, |
|
originInfo: { |
|
text: '', |
|
allowEdit: false, |
|
subFieldsMap: { |
|
'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) }, |
|
'data_source_info.upload_file.size': { |
|
label: t(`${fieldPrefix}.originInfo.originalFileSize`), |
|
render: value => formatFileSize(value), |
|
}, |
|
'created_at': { |
|
label: t(`${fieldPrefix}.originInfo.uploadDate`), |
|
render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), |
|
}, |
|
'completed_at': { |
|
label: t(`${fieldPrefix}.originInfo.lastUpdateDate`), |
|
render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), |
|
}, |
|
'data_source_type': { |
|
label: t(`${fieldPrefix}.originInfo.source`), |
|
render: value => t(`datasetDocuments.metadata.source.${value}`), |
|
}, |
|
}, |
|
}, |
|
technicalParameters: { |
|
text: t('datasetDocuments.metadata.type.technicalParameters'), |
|
allowEdit: false, |
|
subFieldsMap: { |
|
'dataset_process_rule.mode': { |
|
label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`), |
|
render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string), |
|
}, |
|
'dataset_process_rule.rules.segmentation.max_tokens': { |
|
label: t(`${fieldPrefix}.technicalParameters.segmentLength`), |
|
render: value => formatNumber(value), |
|
}, |
|
'average_segment_length': { |
|
label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`), |
|
render: value => `${formatNumber(value)} characters`, |
|
}, |
|
'segment_count': { |
|
label: t(`${fieldPrefix}.technicalParameters.paragraphs`), |
|
render: value => `${formatNumber(value)} paragraphs`, |
|
}, |
|
'hit_count': { |
|
label: t(`${fieldPrefix}.technicalParameters.hitCount`), |
|
render: (value, total) => { |
|
const v = value || 0 |
|
return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})` |
|
}, |
|
}, |
|
'indexing_latency': { |
|
label: t(`${fieldPrefix}.technicalParameters.embeddingTime`), |
|
render: value => formatTime(value), |
|
}, |
|
'tokens': { |
|
label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`), |
|
render: value => `${formatNumber(value)} tokens`, |
|
}, |
|
}, |
|
}, |
|
} |
|
} |
|
|
|
const langPrefix = 'datasetDocuments.metadata.languageMap.' |
|
|
|
export const useLanguages = () => { |
|
const { t } = useTranslation() |
|
return { |
|
zh: t(`${langPrefix}zh`), |
|
en: t(`${langPrefix}en`), |
|
es: t(`${langPrefix}es`), |
|
fr: t(`${langPrefix}fr`), |
|
de: t(`${langPrefix}de`), |
|
ja: t(`${langPrefix}ja`), |
|
ko: t(`${langPrefix}ko`), |
|
ru: t(`${langPrefix}ru`), |
|
ar: t(`${langPrefix}ar`), |
|
pt: t(`${langPrefix}pt`), |
|
it: t(`${langPrefix}it`), |
|
nl: t(`${langPrefix}nl`), |
|
pl: t(`${langPrefix}pl`), |
|
sv: t(`${langPrefix}sv`), |
|
tr: t(`${langPrefix}tr`), |
|
he: t(`${langPrefix}he`), |
|
hi: t(`${langPrefix}hi`), |
|
da: t(`${langPrefix}da`), |
|
fi: t(`${langPrefix}fi`), |
|
no: t(`${langPrefix}no`), |
|
hu: t(`${langPrefix}hu`), |
|
el: t(`${langPrefix}el`), |
|
cs: t(`${langPrefix}cs`), |
|
th: t(`${langPrefix}th`), |
|
id: t(`${langPrefix}id`), |
|
ro: t(`${langPrefix}ro`), |
|
} |
|
} |
|
|
|
const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.' |
|
|
|
export const useBookCategories = () => { |
|
const { t } = useTranslation() |
|
return { |
|
fiction: t(`${bookCategoryPrefix}fiction`), |
|
biography: t(`${bookCategoryPrefix}biography`), |
|
history: t(`${bookCategoryPrefix}history`), |
|
science: t(`${bookCategoryPrefix}science`), |
|
technology: t(`${bookCategoryPrefix}technology`), |
|
education: t(`${bookCategoryPrefix}education`), |
|
philosophy: t(`${bookCategoryPrefix}philosophy`), |
|
religion: t(`${bookCategoryPrefix}religion`), |
|
socialSciences: t(`${bookCategoryPrefix}socialSciences`), |
|
art: t(`${bookCategoryPrefix}art`), |
|
travel: t(`${bookCategoryPrefix}travel`), |
|
health: t(`${bookCategoryPrefix}health`), |
|
selfHelp: t(`${bookCategoryPrefix}selfHelp`), |
|
businessEconomics: t(`${bookCategoryPrefix}businessEconomics`), |
|
cooking: t(`${bookCategoryPrefix}cooking`), |
|
childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`), |
|
comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`), |
|
poetry: t(`${bookCategoryPrefix}poetry`), |
|
drama: t(`${bookCategoryPrefix}drama`), |
|
other: t(`${bookCategoryPrefix}other`), |
|
} |
|
} |
|
|
|
const personalDocCategoryPrefix |
|
= 'datasetDocuments.metadata.categoryMap.personalDoc.' |
|
|
|
export const usePersonalDocCategories = () => { |
|
const { t } = useTranslation() |
|
return { |
|
notes: t(`${personalDocCategoryPrefix}notes`), |
|
blogDraft: t(`${personalDocCategoryPrefix}blogDraft`), |
|
diary: t(`${personalDocCategoryPrefix}diary`), |
|
researchReport: t(`${personalDocCategoryPrefix}researchReport`), |
|
bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`), |
|
schedule: t(`${personalDocCategoryPrefix}schedule`), |
|
list: t(`${personalDocCategoryPrefix}list`), |
|
projectOverview: t(`${personalDocCategoryPrefix}projectOverview`), |
|
photoCollection: t(`${personalDocCategoryPrefix}photoCollection`), |
|
creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`), |
|
codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`), |
|
designDraft: t(`${personalDocCategoryPrefix}designDraft`), |
|
personalResume: t(`${personalDocCategoryPrefix}personalResume`), |
|
other: t(`${personalDocCategoryPrefix}other`), |
|
} |
|
} |
|
|
|
const businessDocCategoryPrefix |
|
= 'datasetDocuments.metadata.categoryMap.businessDoc.' |
|
|
|
export const useBusinessDocCategories = () => { |
|
const { t } = useTranslation() |
|
return { |
|
meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`), |
|
researchReport: t(`${businessDocCategoryPrefix}researchReport`), |
|
proposal: t(`${businessDocCategoryPrefix}proposal`), |
|
employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`), |
|
trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`), |
|
requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`), |
|
designDocument: t(`${businessDocCategoryPrefix}designDocument`), |
|
productSpecification: t(`${businessDocCategoryPrefix}productSpecification`), |
|
financialReport: t(`${businessDocCategoryPrefix}financialReport`), |
|
marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`), |
|
projectPlan: t(`${businessDocCategoryPrefix}projectPlan`), |
|
teamStructure: t(`${businessDocCategoryPrefix}teamStructure`), |
|
policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`), |
|
contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`), |
|
emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`), |
|
other: t(`${businessDocCategoryPrefix}other`), |
|
} |
|
} |
|
|