import { DocumentType } from '@/apis/datastore/model'

const LINE_TOKEN = '⧼-split_line-⧽'
const QA_TAG = '「问题」'
const KEY_TOKEN_REG = /^(「.*?」)(.*)$/
const KEY_MAP = new Map([
  ['「文件id」', 'file_id'],
  ['「文件名」', 'file_name'],
  ['「文件mimetype」', 'mimetype'],
  ['「下载链接」', 'download_url'],
  ['「段落id」', 'chunk_id'],
  ['「命中关键词」', 'keywords'],
  ['「命中排名」', 'relevance_score'],
  ['「段落内容」', 'content'],
  ['「知识库id」', 'datastore_id'],
  ['「知识库名」', 'datastore_name'],
])
const JSON_KEY = ['keywords', 'relevance_score']
const END_KEY = 'content'

function parseLine(data: string) {
  const info: Record<string, any> = {}
  const rest = data.split('\n')
  while (rest.length > 0) {
    const line = rest.shift()?.trim()
    if (!line) continue
    const res = KEY_TOKEN_REG.exec(line)
    if (!res) continue
    const key = KEY_MAP.get(res[1])
    if (!key) {
      info[END_KEY] = line
      break
    }
    info[key] = res[2] as string
    if (key === END_KEY) break
    if (JSON_KEY.includes(key)) {
      try {
        info[key] = JSON.parse(info[key])
      } catch {}
    }
  }

  if (rest.length) {
    info[END_KEY] = [info[END_KEY], ...rest].join('\n')
  }

  if (info.content) {
    info.content = info.content.replaceAll('<br />', '\n')
    info.content = info.content.replaceAll('<br/>', '\n')
    info.file_type = info.content.includes(QA_TAG) ? DocumentType.QA : ''
  }

  return info
}

export function parseCardData(data: string) {
  if (!data || typeof data !== 'string') return []
  const blockList = data
    .split(LINE_TOKEN)
    .filter(each => !!each.trim())
    .map(each => each.trim())
    .map(parseLine)
    .filter(each => !!each.content)
  return blockList
}
