import { DocumentSplitType, DocumentType } from '@/apis/datastore/model.ts'
import { getFileSuffixByMime } from '@/features/datastore/utils'

export const SplitSubTypes = [
  {
    filter: (fileType: DocumentType, mimetype: string) => {
      if (fileType === DocumentType.TEXT) {
        return true
      }
      const suffix = getFileSuffixByMime(mimetype) ?? 'txt'

      return ['txt', 'md', 'png', 'jpg', 'jpeg'].includes(suffix)
    },
    label: '通用类分段',
    value: DocumentSplitType.Text,
    content: 'TXT、图片',
    tips: [
      '· 使用\\n\\n（两次换行）分段',
      '· 段落超过500字时，按句子结束的标点符号进行分段',
    ],
    splitDescription: '两次换行',
  },
  {
    filter: (fileType: DocumentType, mimetype: string) => {
      if (fileType === DocumentType.WEBPAGE) {
        return true
      }
      const suffix = getFileSuffixByMime(mimetype) ?? 'txt'

      return ['doc', 'docx', 'pdf'].includes(suffix)
    },
    label: 'Word类分段',
    value: DocumentSplitType.DOCUMENT,
    content: 'Word、Word导成的PDF、Markdown文档',
    tips: [
      '· 一个标题+段落分为一段',
      '· 段落超过500字时，按句子结束的标点符号进行分段',
    ],
    splitDescription: '标题段落',
  },
  {
    filter: (_: DocumentType, mimetype: string) => {
      const suffix = getFileSuffixByMime(mimetype) ?? 'txt'

      return ['ppt', 'pptx', 'pdf'].includes(suffix)
    },
    label: 'PPT类分段',
    value: DocumentSplitType.PAGE,
    content: 'PPT、PPT导成的PDF',
    tips: ['· 一页分为一段', '· 段落超过500字时，按句子结束的标点符号进行分段'],
    splitDescription: '一页一段',
  },
  {
    filter: (_: DocumentType, mimetype: string) => {
      const suffix = getFileSuffixByMime(mimetype) ?? 'txt'

      return ['xls', 'xlsx'].includes(suffix)
    },
    label: 'Excel分段',
    value: DocumentSplitType.JSON,
    content: 'Excel',
    tips: ['· 一行分为一段', '· 每行内容+对应表头的字数超过500字，会失败'],
    splitDescription: '一行一段',
  },
  {
    filter: (_: DocumentType, mimetype: string) => {
      const suffix = getFileSuffixByMime(mimetype) ?? 'msg'
      return ['msg'].includes(suffix)
    },
    label: '邮件类分段',
    value: DocumentSplitType.NO_SPLIT,
    content: 'msg邮件文档',
    tips: ['将全部内容分为一个段落'],
    splitDescription: '全分为一段',
  },
]
