/* eslint-disable complexity */
import { TranslateServiceType, translateService } from '@/services/translateService'
import { debounce } from 'lodash'
import { makeObservable, observable } from 'mobx'
import { api } from '../api'
import { commonUtils } from '../commonUtils'
import { storageHelper } from '../storageHelper'
import { PdfTranslateHelperConsts as Consts } from './consts'
import { PDFDocumentProxy, PDFPageProxy, TextContent, TextItem } from './pdf'
import { PdfTranslateHelperTypes as Types } from './types'

type OriginalParagraph = Types.OriginalParagraph

interface TranslateParams {
  fileId: number
  doc: PDFDocumentProxy
  currentPage: number
  scaleFactor: number
  from: string
  to: string
  /** 仅解析渲染原文，不翻译 */
  onlyParseOriginal: boolean
  shareHashId?: string
}

/** 任务类型：分享 | 下载 */
type TranslateTaskType = 'share' | 'download'
/**
 * 任务状态：翻译中 | 翻译暂停 | 已完成 | '翻译失败'
 * 页面刷新后，翻译中的任务状态会自动切换到翻译暂停
 */
type TranslateTaskStatus = 'processing' | 'pending' | 'finished' | 'error'

export interface TranslateTask {
  fileId: number
  fileUrl: string
  type: TranslateTaskType
  status: TranslateTaskStatus
  from: TranslateServiceType.LocaleWithAutoDetect
  to: TranslateServiceType.Locale
  /** 开始翻译的页面 */
  startPage: number
  /** 当前翻译任务的目标页数 */
  targetPage: number
  /** 已翻译完成的页数 */
  finishedPage: number
}

export type TranslateTaskCallback = (data: {
  err: string | null
  status: TranslateTaskStatus
  /** 当前翻译任务的目标页数 */
  targetPage: number
  /** 已完成翻译的页数 */
  finishedPage: number
  translateData: Array<Types.PageData>
}) => void

const PARSE_PAGE_COUNT = 3

class PdfTranslateHelper {
  @observable curActiveTaskInfo: {
    task: TranslateTask
    onPdfDocLoaded: (doc: PDFDocumentProxy) => void
  } | null = null

  private taskList: Array<TranslateTask> = []

  private fileParseDataMap: {
    [key: string]: {
      taskUid: string
      totalPage: number
      originalParagraphs: Array<Array<OriginalParagraph>>
      parsedPageData: Array<Types.PageData>
    }
  } = {}

  private translateDebounceCall = debounce(
    async (params: TranslateParams, callback: (data: Array<Types.PageData>) => void) => {
      const { fileId, currentPage, from, to, shareHashId } = params
      // 翻译当前页及前后三页
      let startIndex = currentPage - PARSE_PAGE_COUNT
      if (startIndex < 0) {
        startIndex = 0
      }
      const { taskUid, parsedPageData, totalPage } = this.fileParseDataMap[params.fileId]
      let endIndex = currentPage + PARSE_PAGE_COUNT
      if (endIndex > parsedPageData.length) {
        endIndex = parsedPageData.length
      }
      const pageData = parsedPageData.slice(startIndex, endIndex)
      await this.translatePage(pageData, from, to, taskUid, fileId, callback, shareHashId)
      // 记录翻译状态
      let errMsg: string | null = null
      pageData.some(({ paragraphs }) => {
        if (paragraphs.length === 0) {
          return false
        }
        // 只要有一个段落翻译失败，就认为整个页面翻译失败
        errMsg = paragraphs[0].errMsg
        return errMsg
      })
      if (!errMsg) {
        const { finishedPage } = await this.getTranslateTaskStatus({
          fileId,
          from,
          to,
        })
        const startPage = startIndex + 1
        const endPage = endIndex
        // 本次翻译的开始页面应该小于等于上一次翻译完成的页面的下一页（finishedPage + 1）
        // 本次翻译的结束页面应该大于上一次翻译完成的页面
        // console.log('record translate status', { fileId, from, to, startPage, endPage, finishedPage })
        if (startPage <= finishedPage + 1 && finishedPage < endPage) {
          this.recordTranslateStatusDebounceCall(
            {
              fileId,
              fromLang: from,
              toLang: to,
              pageNumber: endPage,
              totalPageNumber: totalPage,
            },
            () => {
              // 更新到 storage
              let { pdfTranslateTaskList } = storageHelper.get(['pdfTranslateTaskList'])
              if (!pdfTranslateTaskList) {
                pdfTranslateTaskList = []
              }
              const storageTask = pdfTranslateTaskList.find(
                (task) => task.fileId === fileId && task.from === from && task.to === to
              )
              if (storageTask) {
                storageTask.finishedPage = endPage
                this.updateTaskToStorage(storageTask)
              }
            }
          )
        }
      }
    },
    1000
  )

  private recordTranslateStatusDebounceCall = debounce(
    async (
      params: {
        fileId: number
        fromLang: TranslateServiceType.LocaleWithAutoDetect
        toLang: TranslateServiceType.Locale
        /** 已翻译页数 */
        pageNumber: number
        /** 文件总页数 */
        totalPageNumber: number
      },
      callback: () => void
    ) => {
      await api.pdf.recordTranslateStatus(params)
      callback()
    },
    1000
  )

  public init() {
    makeObservable(this)
  }

  public async doTranslate(
    params: TranslateParams,
    callback: (data: Array<Types.PageData>) => void
  ) {
    // 解析 pdf
    const parsedPageData = await this.tryParsePdf(params)
    if (!params.onlyParseOriginal) {
      // 异步处理翻译逻辑
      this.translateDebounceCall(params, callback)
    }
    callback(parsedPageData)
  }

  public resetStatus(fileId: number) {
    this.fileParseDataMap[fileId] = {
      taskUid: commonUtils.genId(),
      totalPage: 0,
      originalParagraphs: [],
      parsedPageData: [],
    }
  }

  /**
   * 获取文件对应翻译任务的状态
   * @param fileId 文件 id
   * @returns
   *   finishedPage: 指定 from&to 下，文件已完成翻译的页数
   *   lastTask 最后一次翻译任务的状态。为 null 时表示没有翻译任务
   */
  public async getTranslateTaskStatus({
    fileId,
    from,
    to,
  }: {
    fileId: number
    from: TranslateServiceType.LocaleWithAutoDetect
    to: TranslateServiceType.Locale
  }): Promise<{
    finishedPage: number
    lastTask: TranslateTask | null
  }> {
    console.log('getTranslateTaskStatus', { fileId, from, to })
    let { pdfTranslateTaskList } = storageHelper.get(['pdfTranslateTaskList'])
    if (!pdfTranslateTaskList) {
      pdfTranslateTaskList = []
    }
    const getFinishedPage = async (): Promise<number> => {
      const hasContextTask = this.taskList.find(
        (task) => task.fileId === fileId && task.from === from && task.to === to
      )
      if (hasContextTask) {
        // console.log('return 1', hasContextTask.task.finishedPage)
        return hasContextTask.finishedPage
      }
      const storageTask = pdfTranslateTaskList.find(
        (task) => task.fileId === fileId && task.from === from && task.to === to
      )
      if (storageTask) {
        // console.log('return 2', storageTask.finishedPage)
        return storageTask.finishedPage
      }
      const {
        data: { fields },
      } = await api.pdf.getTranslateStatus(fileId)
      const targetTask = fields.find((f: any) => f.toLang === to && f.fromLang === from)
      const finishedPage = targetTask ? targetTask.translatePageNumber : 0
      // console.log('return 3', finishedPage)
      return finishedPage
    }
    const getLastTask = (): TranslateTask | null => {
      const hasContextTask = this.taskList.find((task) => task.fileId === fileId)
      if (hasContextTask) {
        return hasContextTask
      }
      const storageTask = pdfTranslateTaskList.find((task) => task.fileId === fileId)
      if (storageTask) {
        return storageTask
      }
      return null
    }

    const finishedPage = await getFinishedPage()
    const lastTask = getLastTask()
    return {
      finishedPage,
      lastTask,
    }
  }

  /** 创建翻译任务 */
  public async createTranslateTask(
    {
      fileId,
      from,
      to,
      targetPage,
      type,
      totalPage,
      forceTranslate,
    }: {
      fileId: number
      type: TranslateTaskType
      from: TranslateServiceType.LocaleWithAutoDetect
      to: TranslateServiceType.Locale
      targetPage: number
      totalPage: number
      forceTranslate?: boolean
    },
    callback: TranslateTaskCallback
  ) {
    console.log('createTranslateTask', fileId, from, to, targetPage, totalPage)
    const { finishedPage, lastTask } = await this.getTranslateTaskStatus({ fileId, from, to })
    if (lastTask && lastTask.status === 'processing') {
      callback({
        err: '当前文件正在翻译中，请稍后再试',
        status: 'error',
        targetPage,
        finishedPage,
        translateData: [],
      })
      return
    }
    if (!forceTranslate && finishedPage >= targetPage) {
      callback({
        err: null,
        status: 'finished',
        targetPage,
        finishedPage,
        translateData: [],
      })
      return
    }
    // 移除同一个文件的已有任务
    this.taskList = this.taskList.filter((task) => task.fileId !== fileId)
    this.curActiveTaskInfo = null
    const { downloadUrl } = await api.pdf.getFileUrl(fileId)
    const task: TranslateTask = {
      fileId,
      fileUrl: downloadUrl,
      startPage: forceTranslate ? 1 : finishedPage + 1,
      targetPage,
      finishedPage,
      from,
      to,
      status: 'processing',
      type,
    }
    // 翻译完成后的回调
    const translatedCallback: TranslateTaskCallback = async ({
      err,
      status,
      targetPage,
      finishedPage,
      translateData,
    }) => {
      // 记录翻译状态
      this.recordTranslateStatusDebounceCall(
        {
          fileId,
          fromLang: from,
          toLang: to,
          pageNumber: finishedPage,
          totalPageNumber: totalPage,
        },
        () => {
          // 更新状态
          this.taskList.some((task) => {
            if (task.fileId === fileId) {
              task.finishedPage = finishedPage
              task.status = status
              return true
            }
            return false
          })
          // console.log('record translate status success', { task, })
          // 更新到 storage
          const storageTask: TranslateTask = {
            ...task,
            // storage 中的状态要么为 finished，要么为 pending
            status: status === 'finished' ? 'finished' : 'pending',
          }
          this.updateTaskToStorage(storageTask)
          if (status === 'finished') {
            // 回调数据给业务方
            callback({
              err,
              status,
              targetPage,
              finishedPage,
              translateData,
            })
          }
        }
      )
      if (status !== 'finished') {
        // 回调数据给业务方
        callback({
          err,
          status,
          targetPage,
          finishedPage,
          translateData,
        })
      }
    }
    // PDF 文档加载完成后的回调
    const pdfDocLoadedCallback: (doc: PDFDocumentProxy) => void = (doc) => {
      pdfTranslateHelper.translateTargetPage(
        {
          doc: doc as unknown as PDFDocumentProxy,
          fileId,
          from,
          to,
          startPage: task.startPage,
          targetPage,
        },
        translatedCallback
      )
    }
    // 更新到 runtime。会触发 PdfTranslateTask 组件的渲染
    // PdfTranslateTask 组件会渲染 PDF，并通过 onPdfDocLoaded 事件将
    // doc 对象传出来
    this.curActiveTaskInfo = {
      task,
      onPdfDocLoaded: pdfDocLoadedCallback,
    }
    // 更新到上下文
    this.taskList.push(task)
    // 更新到 storage
    const storageTask: TranslateTask = {
      ...task,
      // storage 中的状态初始为 pending
      status: 'pending',
    }
    this.updateTaskToStorage(storageTask)
  }

  /** 翻译指定页面 */
  public async translateTargetPage(
    {
      fileId,
      doc,
      from,
      to,
      startPage,
      targetPage,
    }: {
      fileId: number
      doc: PDFDocumentProxy
      from: string
      to: string
      startPage: number
      targetPage: number
    },
    callback: TranslateTaskCallback
  ) {
    const promiseArray: Array<Promise<Types.PageData>> = []
    for (let i = 0; i < doc.numPages; i++) {
      if (i >= startPage - 1 && i < targetPage) {
        promiseArray.push(this.parsePdfPage(doc, i, fileId))
      }
    }
    const pageDataList = await Promise.all(promiseArray)
    pageDataList.sort((a, b) => a.pageNum - b.pageNum)
    const totalCount = pageDataList.length
    const taskUid = commonUtils.genId()
    // console.log('translateTargetPage', startPage, targetPage, pageDataList)
    const doTranslate = async (startIdx: number) => {
      const endIdx = Math.min(startIdx + 5, totalCount)
      const dataList = pageDataList.slice(startIdx, endIdx)
      // console.log('do translate', startIdx, endIdx, totalCount, dataList)
      await this.translatePage(dataList, from, to, taskUid, fileId, () => {})
      let errMsg: string | null = null
      dataList.some(({ paragraphs }) => {
        if (paragraphs.length === 0) {
          return false
        }
        // 只要有一个段落翻译失败，就认为整个页面翻译失败
        errMsg = paragraphs[0].errMsg
        return errMsg
      })
      let status: TranslateTaskStatus = endIdx >= totalCount ? 'finished' : 'processing'
      let finishedPage = startPage - 1 + endIdx
      if (errMsg) {
        status = 'error'
        finishedPage = startPage
      }
      callback({
        err: errMsg,
        status,
        finishedPage,
        targetPage,
        translateData: pageDataList,
      })
      // 继续翻译。直到翻译完成
      if (status === 'processing') {
        doTranslate(endIdx)
      }
    }
    doTranslate(0)
  }

  private updateTaskToStorage(task: TranslateTask) {
    let { pdfTranslateTaskList } = storageHelper.get(['pdfTranslateTaskList'])
    if (!pdfTranslateTaskList) {
      pdfTranslateTaskList = []
    }
    pdfTranslateTaskList = pdfTranslateTaskList.filter(({ fileId }) => fileId !== task.fileId)
    pdfTranslateTaskList.push(task)
    storageHelper.set({ pdfTranslateTaskList })
  }

  private async tryParsePdf({
    doc,
    currentPage,
    fileId,
  }: TranslateParams): Promise<Array<Types.PageData>> {
    const promiseArray: Array<Promise<Types.PageData>> = []
    for (let i = 0; i < doc.numPages; i++) {
      // 解析当前页及前后三页
      if (i >= currentPage - PARSE_PAGE_COUNT && i < currentPage + PARSE_PAGE_COUNT) {
        promiseArray.push(this.parsePdfPage(doc, i, fileId))
      }
    }
    const pageDataList = await Promise.all(promiseArray)
    pageDataList.sort((a, b) => a.pageNum - b.pageNum)
    return pageDataList
  }

  private async parsePdfPage(
    doc: PDFDocumentProxy,
    pageNum: number,
    fileId: number
  ): Promise<Types.PageData> {
    let fileParseData = this.fileParseDataMap[fileId]
    if (!fileParseData) {
      fileParseData = {
        taskUid: commonUtils.genId(),
        totalPage: doc.numPages,
        originalParagraphs: [],
        parsedPageData: [],
      }
      this.fileParseDataMap[fileId] = fileParseData
    }
    fileParseData.totalPage = doc.numPages
    const { parsedPageData, originalParagraphs } = fileParseData
    if (parsedPageData[pageNum]) {
      return parsedPageData[pageNum]
    }
    const pdfPage = await doc.getPage(pageNum + 1)
    const { height: pageHeight, width: pageWidth } = pdfPage.getViewport({
      scale: 1,
    })
    const textContent = await pdfPage.getTextContent()
    const prevPageParagraphs = originalParagraphs[pageNum - 1]
    const curOriginalParagraphs = await this.getOriginalParagraphs({
      pageWidth,
      pageHeight,
      prevPageParagraphs,
      textContent,
    })
    originalParagraphs[pageNum] = curOriginalParagraphs
    const paragraphs = this.generateParagraphs(curOriginalParagraphs)
    let textContentStr = ''
    try {
      textContentStr = textContent.items.map((item) => (item as TextItem).str).join('')
    } catch (error) {
      //do nothing
    }
    parsedPageData[pageNum] = {
      paragraphs,
      pageNum,
      width: pageWidth,
      height: pageHeight,
      textContext: textContentStr,
    }
    return parsedPageData[pageNum]
  }

  private async translatePage(
    pageDataList: Array<Types.PageData>,
    from: string,
    to: string,
    taskUid: string,
    fileId: number,
    callback: (data: Array<Types.PageData>) => void,
    shareHashId?: string
  ) {
    const promiseArray = pageDataList.map(async (pageData) => {
      const needTranslateParagraphs = this.getNeedTranslateParagraphs([pageData])
      const paragraphs = needTranslateParagraphs.map(({ id, sourceText }) => {
        return {
          text: sourceText,
          from,
          to,
          id,
          htmlStr: '',
        }
      })
      const context = this.getPdfContext(pageData)
      await translateService.translate(
        {
          paragraphs,
          taskUid,
          originUrl: window.location.href,
          originTitle: document.title,
          triggerType: 'pdf',
          shareHashId,
          fileId,
          context,
        },
        (results) => {
          results.forEach(({ result, id, error }) => {
            pageDataList.forEach(({ paragraphs }) => {
              const p = paragraphs.find(({ id: pId }) => pId === id)
              if (p) {
                if (!error) {
                  p.translateResult = result
                } else {
                  p.errMsg = error || '翻译失败，请稍后重试或联系客服'
                }
              }
            })
          })
          callback(pageDataList)
        }
      )
    })
    await Promise.all(promiseArray)
  }

  private async getOriginalParagraphs({
    prevPageParagraphs,
    pageWidth,
    pageHeight,
    textContent,
  }: {
    textContent: TextContent
    pageWidth: number
    pageHeight: number
    prevPageParagraphs: Array<OriginalParagraph>
  }): Promise<Array<OriginalParagraph>> {
    // 合并行
    const rowLineParagraphs = this.getRowLineParagraphs({
      pageHeight,
      data: textContent,
    })
    // 把属于同一段，但被分隔在上下两页的段落连起来
    const { prevP, curP } = this.findPageContactParagraph({
      rowLineParagraphs,
      pageWidth,
      prevPageParagraphs,
    })
    if (curP) {
      this.contactSplitParagraph(prevP!, undefined, curP)
    }
    // 将行合并为段落
    const paragraphs = this.mergeParagraphs(rowLineParagraphs)
    // 过滤掉不需要翻译的段落
    const filteredParagraphs = paragraphs.filter((p) => {
      return !this.isSkipTranslate(p.str)
    })
    // 判断段落是否独占一行
    filteredParagraphs
      .filter((p) => {
        return p.mergedTimes === 0
      })
      .forEach((p) => {
        const { top, bottom } = p
        const hasOverlap = filteredParagraphs.some((item) => {
          const topOverlap = item.top >= top && item.top <= bottom
          const bottomOverLap = item.bottom >= top && item.bottom <= bottom
          const allOverLap = item.top <= top && item.bottom >= bottom
          const overlap = topOverlap || bottomOverLap || allOverLap
          return item !== p && overlap
        })
        p.isBlockLine = !hasOverlap
      })
    // console.log('merged paragraphs', JSON.parse(JSON.stringify(paragraphs)))
    return filteredParagraphs
  }

  private generateParagraphs(originalParagraphs: Array<OriginalParagraph>): Array<Types.Paragraph> {
    const paragraphs: Array<Types.Paragraph> = []
    originalParagraphs.forEach((p) => {
      paragraphs.push({
        id: p.id,
        sourceText: p.str,
        translateResult: '',
        originalParagraph: p,
        errMsg: null,
      })
    })
    return paragraphs
  }

  private getNeedTranslateParagraphs(pageDataList: Array<Types.PageData>) {
    const needTranslateParagraphs: Array<Types.Paragraph> = []
    pageDataList.forEach(({ paragraphs }) => {
      paragraphs
        .filter((p) => {
          return !p.translateResult
        })
        .forEach((p) => {
          needTranslateParagraphs.push(p)
        })
    })
    return needTranslateParagraphs
  }

  private isSkipTranslate(str: string) {
    return str.length <= 1 || !!(!str || /^[\d\\.:%\\(\\),%\s\\-]+$/.test(str))
  }

  private skipRender(str: string) {
    return !!(!str || /^_{6,}$/.test(str))
  }

  // 获取 pdf.js 的解析结果
  // 会对解析结果做一些处理，例如：把属于同一行的同一段进行合并
  private getRowLineParagraphs({ data, pageHeight }: { data: TextContent; pageHeight: number }) {
    // console.log('textData', data, pageHeight)
    const styles = data.styles
    const textContentItems = data.items as Array<TextItem>

    const mergeRowLineP = (paragraphs: Array<OriginalParagraph>, lineP: OriginalParagraph) => {
      const { str, right, fontFamily, width, nextLeft, nextTop, fontSize } = lineP
      if (!str.trim()) {
        return
      }
      if (fontSize === 0) {
        return
      }
      if (paragraphs.length === 0) {
        paragraphs.push(lineP)
      } else {
        const prevLineP = paragraphs[paragraphs.length - 1]
        // 判断是否为同一段的同一行字
        if (this.isSameLineText(prevLineP, lineP)) {
          // 原本就是完整的一段字，但是解析器分成了两段的情况。例如原本是：
          // 你好我的名字叫做xxx，但是解析器可能分成了：段一：你好我的名字，段二：叫做xxx
          if (this.isSameLineBreakText(prevLineP, lineP)) {
            prevLineP.str += str
          } else {
            prevLineP.str += ` ${str}`
          }
          // console.log('isSameLineText', prevLineP, curLineP)
          prevLineP.fontSize = Math.max(prevLineP.fontSize, fontSize)
          prevLineP.right = right
          prevLineP.fontFamily = fontFamily
          prevLineP.width = Math.max(prevLineP.width + width, right - prevLineP.left)
          prevLineP.nextLeft = nextLeft
          prevLineP.nextTop = Math.max(prevLineP.nextTop, nextTop)
          return
        }
        // 后缀为子符号的情况，例如：测试*
        if (this.isSubSymbolSuffix(prevLineP, lineP)) {
          // console.log('isSubSymbol', prevLineP, curLineP)
          prevLineP.fontSize = Math.max(prevLineP.fontSize, fontSize)
          prevLineP.right = right
          prevLineP.width = Math.max(prevLineP.width, width)
          prevLineP.nextLeft = Math.max(prevLineP.nextLeft, nextLeft)
          prevLineP.nextTop = Math.max(prevLineP.nextTop, nextTop)
          prevLineP.attachList.push(lineP)
          return
        }
        paragraphs.push(lineP)
      }
    }

    const preMergedParagraphs: Array<OriginalParagraph> = []
    textContentItems.forEach((item) => {
      const { fontName, str, transform, width } = item
      const { fontFamily } = styles[fontName]
      const scaleX = transform[0]
      const scaleY = transform[3]
      const translateX = transform[4]
      const translateY = transform[5]
      const fontSize = scaleY
      const left = translateX
      const right = left + width
      const top = pageHeight - translateY - fontSize
      const bottom = top + fontSize
      const nextTop = bottom + fontSize * 0.2
      const nextLeft = right + scaleX * 0.3
      const curLineP: OriginalParagraph = {
        id: commonUtils.genId(),
        str,
        left,
        nextLeft,
        top,
        nextTop,
        width,
        right,
        bottom,
        fontFamily,
        fontSize,
        attachList: [],
        mergedTimes: 0,
        textIndent: false,
        isBlockLine: false,
      }
      mergeRowLineP(preMergedParagraphs, curLineP)
    })
    const mergedParagraphs: Array<OriginalParagraph> = []
    // 某些 pdf 解析结果中，会出现原本从左到右的数据，被解析成从右到左的情况（应该在数组前面的元素出现在了后面）
    // 因此这里对解析结果进行反转后再做了一次合并
    preMergedParagraphs.reverse().forEach((item) => {
      mergeRowLineP(mergedParagraphs, item)
    })
    const origLineParagraphs = mergedParagraphs.reverse()
    // console.log('origLineParagraphs', JSON.parse(JSON.stringify(origLineParagraphs)))
    return origLineParagraphs
  }

  // 通过一系列逻辑判断合并上下两行
  private mergeParagraphs(origLineParagraphs: Array<OriginalParagraph>) {
    const mergedParagraphs: Array<OriginalParagraph> = []
    origLineParagraphs.forEach((curP, index) => {
      if (index === 0 || mergedParagraphs.length === 0) {
        this.pushMergedParagraphs(curP, mergedParagraphs)
        return
      }
      const prevP = mergedParagraphs[mergedParagraphs.length - 1]

      // 两端对齐
      if (this.isJustifyAlignText(prevP, curP)) {
        // console.log('isJustifyAlignText', prevP.str, curP.str)
        this.mergeParagraph(prevP, curP, mergedParagraphs)
        return
      }

      // 居左对齐
      if (this.isAlignLeftText(prevP, curP)) {
        // console.log('isAlignLeftText', prevP, curP)
        this.mergeParagraph(prevP, curP, mergedParagraphs)
        return
      }

      // 第一行左缩进
      if (this.isIndentText(prevP, curP)) {
        // console.log('isIndentText', prevP.str, curP.str)
        prevP.textIndent = true
        this.mergeParagraph(prevP, curP, mergedParagraphs)
        return
      }

      // 引用
      if (this.isRefererText(prevP, curP)) {
        // console.log('isSecondIndentText', prevP.str, '|----|', curP.str)
        this.mergeParagraph(prevP, curP, mergedParagraphs)
        return
      }

      // 居中对齐
      if (this.isJustifyCenterText(prevP, curP)) {
        this.mergeParagraph(prevP, curP, mergedParagraphs)
        return
      }

      this.pushMergedParagraphs(curP, mergedParagraphs)
    })
    // console.log('mergedParagraphs', mergedParagraphs)
    return mergedParagraphs
  }

  private pushMergedParagraphs(
    curP: OriginalParagraph,
    mergedParagraphs: Array<OriginalParagraph>
  ) {
    if (this.skipRender(curP.str)) {
      return
    }
    mergedParagraphs.push(curP)
  }

  private mergeParagraph(
    prevP: OriginalParagraph,
    curP: OriginalParagraph,
    mergedParagraphs: Array<OriginalParagraph>,
    prefixChar = ' ',
    checkDivide = true
  ) {
    let _prefixChar = prefixChar
    if (checkDivide && curP.str && this.isDivide(curP.str)) {
      this.pushMergedParagraphs(curP, mergedParagraphs)
      return
    }
    if (this.skipRender(curP.str)) {
      return
    }
    if (this.equal(prevP.nextTop, curP.top) && prevP.str.endsWith('-')) {
      prevP.str = prevP.str.slice(0, prevP.str.length - 1)
      _prefixChar = ''
    }
    if (prevP.str.length < 5 || curP.str.length < 5) {
      prevP.fontSize = Math.max(prevP.fontSize, curP.fontSize)
      prevP.width = Math.max(prevP.width + curP.width, curP.right - prevP.left)
    }
    prevP.mergedTimes++
    prevP.str += _prefixChar + curP.str
    prevP.bottom = curP.bottom
    prevP.nextTop = curP.nextTop
    if (prevP.left > curP.left) {
      prevP.left = curP.left
    }
  }

  private contactSplitParagraph(
    prevP: OriginalParagraph,
    prevLineP: OriginalParagraph | undefined,
    curP: OriginalParagraph
  ) {
    if (!this.equalFont(prevP, curP) || prevP.width + 1 < curP.width) return
    const separators = [',', '\\.', '\uFF0C', '\u3002']
    const regexBase = separators.join('|')
    const pureRegexBase = separators.join('').replace('\\', '')
    if (!new RegExp(regexBase).test(prevP.str[prevP.str.length - 1])) {
      const regex = new RegExp(`[${pureRegexBase}]([^${pureRegexBase}]+?$)`)
      let prefixStartStr = ''
      prevP.str = prevP.str.replace(regex, (_, str) => ((prefixStartStr = str), ''))
      if (prevLineP && prevLineP.str === prevP.str && !prefixStartStr) {
        prefixStartStr = prevP.str
        prevP.str = ''
        if (prefixStartStr.endsWith('-')) {
          prefixStartStr = prefixStartStr.replace('-', '')
        } else {
          prefixStartStr += ' '
        }
        curP.str = prefixStartStr.trimStart() + curP.str
      }
    }
  }

  // 把属于同一段，但被分隔在上下两页的段落连起来
  private findPageContactParagraph({
    pageWidth,
    rowLineParagraphs,
    prevPageParagraphs,
  }: {
    pageWidth: number
    prevPageParagraphs: Array<OriginalParagraph>
    rowLineParagraphs: Array<OriginalParagraph>
  }) {
    const width = pageWidth
    if (!prevPageParagraphs) {
      return {}
    }
    if (prevPageParagraphs.length <= 0 || rowLineParagraphs.length <= 0) {
      return {}
    }
    const prevP = prevPageParagraphs[prevPageParagraphs.length - 1]
    if (!prevP) {
      return {}
    }
    const prevRightPadding = width - prevP.right
    const curP = rowLineParagraphs.find(
      (item) => this.equal(item.left, prevRightPadding) && this.equal(item.width, prevP.width)
    )
    return {
      prevP,
      curP,
    }
  }

  // 单行合并：处理原本是同一行的同一段，但是被分成了两段的情况
  // 例如：你好，我的名字叫做xxx，但是解析器可能分成了：段一：你好，我的名字，段二：叫做xxx
  private isSameLineText(prevLineP: OriginalParagraph, curP: OriginalParagraph) {
    const { fontSize, left } = curP
    return (
      this.isSameLine(prevLineP, curP) &&
      this.equal(prevLineP.nextLeft, left, fontSize) &&
      this.equal(prevLineP.fontSize, fontSize)
    )
  }

  // 单行合并：处理原本是同一个词，但是被分成了两段的情况
  // 例如：Test 被解析成了 T 和 est
  private isSameLineBreakText(prevLineP: OriginalParagraph, curLineP: OriginalParagraph) {
    return this.equal(
      prevLineP.right,
      curLineP.left,
      curLineP.left < prevLineP.right ? prevLineP.fontSize : 1
    )
  }

  // 单行合并：合并子符号
  private isSubSymbolSuffix(prevLineP: OriginalParagraph, curLineP: OriginalParagraph) {
    const isMaybeTopRightSub = this.equal(curLineP.top, prevLineP.top, 3)
    const isMaybeBottomRightSub = this.equal(curLineP.bottom, prevLineP.bottom, 3)
    return (
      curLineP.fontSize <= prevLineP.fontSize * 0.8 &&
      this.equal(prevLineP.nextLeft, curLineP.left, prevLineP.fontSize) &&
      (isMaybeTopRightSub || isMaybeBottomRightSub)
    )
  }

  // 两行合并：两端对齐
  private isJustifyAlignText(prevP: OriginalParagraph, curP: OriginalParagraph) {
    return (
      this.isWrapLineText(prevP, curP) &&
      this.equal(
        prevP.left,
        curP.left,
        this.startWith(prevP.str, Consts.LIST_STYLE_PREFIXES) ? 4 : 1.5
      ) &&
      this.equal(prevP.right, curP.right, 1)
    )
  }

  // 两行合并：左对齐
  private isAlignLeftText(prevP: OriginalParagraph, curP: OriginalParagraph) {
    return (
      this.isWrapLineText(prevP, curP) &&
      this.equal(prevP.left, curP.left, 1.5) &&
      (prevP.width > curP.width || this.equal(prevP.right, curP.right, curP.fontSize * 4))
    )
  }

  // 两行合并：第一行为左缩进
  private isIndentText(prevP: OriginalParagraph, curP: OriginalParagraph) {
    const diffWith = Math.abs(prevP.width - curP.width)
    const maxWith = Math.max(prevP.width, curP.width)
    return (
      this.isWrapLineText(prevP, curP) &&
      diffWith / maxWith < 0.1 &&
      curP.left < prevP.left &&
      prevP.left - curP.left >= prevP.fontSize * 0.5 &&
      this.equal(curP.left, prevP.left, curP.fontSize * 4)
    )
  }

  // 两行合并：第二行为左缩进。一般出现在渲染引用的情况
  private isRefererText(prevP: OriginalParagraph, curP: OriginalParagraph) {
    return (
      this.startWith(prevP.str, ['[']) &&
      !this.isJustifyCenterText(prevP, curP) &&
      this.isWrapLineText(prevP, curP) &&
      curP.width < prevP.width &&
      prevP.left < curP.left &&
      this.equal(curP.left, prevP.left, curP.fontSize * 4)
    )
  }

  // 两行合并：判断上下两行是否为居中对齐
  private isJustifyCenterText(prevP: OriginalParagraph, curP: OriginalParagraph) {
    const leftDiff = Math.abs(prevP.left - curP.left)
    const rightDiff = Math.abs(prevP.right - curP.right)
    return (
      prevP.width > curP.width &&
      1 < leftDiff &&
      this.isWrapLineText(prevP, curP) &&
      this.equal(leftDiff, rightDiff, 1.5)
    )
  }

  // 两行合并：判断是否符合同一段换行的基本条件
  private isWrapLineText(prevP: OriginalParagraph, curP: OriginalParagraph) {
    return (
      // 字体相同
      this.equalFont(prevP, curP, prevP.str.length < 30) &&
      // 下一行的 top 和上一行的 bottom 的距离介于行高内
      this.equal(prevP.nextTop, curP.top, prevP.fontSize * 0.65) &&
      // 判断下一行是否为列表项
      !this.startWith(curP.str, Consts.LIST_STYLE_PREFIXES)
    )
  }

  // 判断字体大小和字体名称是否相同
  private equalFont(
    prevP: OriginalParagraph,
    { fontSize, fontFamily: fontName }: OriginalParagraph,
    checkFont = false
  ) {
    return checkFont
      ? this.equal(prevP.fontSize, fontSize, 1) && prevP.fontFamily === fontName
      : this.equal(prevP.fontSize, fontSize, 1)
  }

  // 从定位的角度来看，是否是同一行（只判断是否同一行，不判断是否属于同一段）
  // 例如：你好        大馒头，会被认为是同一行
  private isSameLine(prevLineP: OriginalParagraph, { bottom }: OriginalParagraph) {
    return this.equal(prevLineP.bottom, bottom, 1)
  }

  private equal(a: number, b: number, cap = 5) {
    return Math.abs(a - b) <= cap
  }

  private isDivide(str: string) {
    return str ? str === str[0].repeat(str.length) : true
  }

  private startWith(str: string, prefixList: string[]) {
    return prefixList.some((prefix) => {
      return str.startsWith(prefix)
    })
  }

  private async getPdfImgCanvas(page: PDFPageProxy) {
    // 获取 img 位置参考：https://github.com/TomasHubelbauer/globus/blob/main/index.mjs#L63
    const canvasList: Array<HTMLCanvasElement> = []
    const { argsArray, fnArray } = await page.getOperatorList()
    for (let i = 0; i < fnArray.length; i++) {
      // 85 === pdfjs.OPS.paintImageXObject
      if (fnArray[i] === 85) {
        const img = page.objs.get(argsArray[i][0])
        const canvas = document.createElement('canvas')
        canvas.width = img.width
        canvas.height = img.height
        const ctx = canvas.getContext('2d')!
        this.putBinaryImageData(ctx, img)
        canvasList.push(canvas)
      }
    }
  }

  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  private putBinaryImageData(ctx: CanvasRenderingContext2D, imgData: any, transferMaps = null) {
    const FULL_CHUNK_HEIGHT = 16

    const ImageKind = {
      GRAYSCALE_1BPP: 1,
      RGB_24BPP: 2,
      RGBA_32BPP: 3,
    }

    if (typeof ImageData !== 'undefined' && imgData instanceof ImageData) {
      ctx.putImageData(imgData, 0, 0)
      return
    }

    const height = imgData.height,
      width = imgData.width
    const partialChunkHeight = height % FULL_CHUNK_HEIGHT
    const fullChunks = (height - partialChunkHeight) / FULL_CHUNK_HEIGHT
    const totalChunks = partialChunkHeight === 0 ? fullChunks : fullChunks + 1
    const chunkImgData = ctx.createImageData(width, FULL_CHUNK_HEIGHT)
    let srcPos = 0,
      destPos
    const src = imgData.data
    const dest = chunkImgData.data
    let i, j, thisChunkHeight, elemsInThisChunk
    let transferMapRed, transferMapGreen, transferMapBlue, transferMapGray

    if (transferMaps) {
      // eslint-disable-next-line @typescript-eslint/no-explicit-any
      switch ((transferMaps as any).length) {
        case 1:
          transferMapRed = transferMaps[0]
          transferMapGreen = transferMaps[0]
          transferMapBlue = transferMaps[0]
          transferMapGray = transferMaps[0]
          break

        case 4:
          transferMapRed = transferMaps[0]
          transferMapGreen = transferMaps[1]
          transferMapBlue = transferMaps[2]
          transferMapGray = transferMaps[3]
          break
      }
    }

    if (imgData.kind === ImageKind.GRAYSCALE_1BPP) {
      const srcLength = src.byteLength
      const dest32 = new Uint32Array(dest.buffer, 0, dest.byteLength >> 2)
      const dest32DataLength = dest32.length
      const fullSrcDiff = (width + 7) >> 3
      let white = 0xffffffff
      // let black = _util.IsLittleEndianCached.value ? 0xff000000 : 0x000000ff
      let black = 0xff000000

      if (transferMapGray) {
        if (transferMapGray[0] === 0xff && transferMapGray[0xff] === 0) {
          ;[white, black] = [black, white]
        }
      }

      for (i = 0; i < totalChunks; i++) {
        thisChunkHeight = i < fullChunks ? FULL_CHUNK_HEIGHT : partialChunkHeight
        destPos = 0

        for (j = 0; j < thisChunkHeight; j++) {
          const srcDiff = srcLength - srcPos
          let k = 0
          const kEnd = srcDiff > fullSrcDiff ? width : srcDiff * 8 - 7
          const kEndUnrolled = kEnd & ~7
          let mask = 0
          let srcByte = 0

          for (; k < kEndUnrolled; k += 8) {
            srcByte = src[srcPos++]
            dest32[destPos++] = srcByte & 128 ? white : black
            dest32[destPos++] = srcByte & 64 ? white : black
            dest32[destPos++] = srcByte & 32 ? white : black
            dest32[destPos++] = srcByte & 16 ? white : black
            dest32[destPos++] = srcByte & 8 ? white : black
            dest32[destPos++] = srcByte & 4 ? white : black
            dest32[destPos++] = srcByte & 2 ? white : black
            dest32[destPos++] = srcByte & 1 ? white : black
          }

          for (; k < kEnd; k++) {
            if (mask === 0) {
              srcByte = src[srcPos++]
              mask = 128
            }

            dest32[destPos++] = srcByte & mask ? white : black
            mask >>= 1
          }
        }

        while (destPos < dest32DataLength) {
          dest32[destPos++] = 0
        }

        ctx.putImageData(chunkImgData, 0, i * FULL_CHUNK_HEIGHT)
      }
    } else if (imgData.kind === ImageKind.RGBA_32BPP) {
      const hasTransferMaps = !!(transferMapRed || transferMapGreen || transferMapBlue)
      j = 0
      elemsInThisChunk = width * FULL_CHUNK_HEIGHT * 4

      for (i = 0; i < fullChunks; i++) {
        dest.set(src.subarray(srcPos, srcPos + elemsInThisChunk))
        srcPos += elemsInThisChunk

        if (hasTransferMaps) {
          for (let k = 0; k < elemsInThisChunk; k += 4) {
            if (transferMapRed) {
              dest[k + 0] = transferMapRed[dest[k + 0]]
            }

            if (transferMapGreen) {
              dest[k + 1] = transferMapGreen[dest[k + 1]]
            }

            if (transferMapBlue) {
              dest[k + 2] = transferMapBlue[dest[k + 2]]
            }
          }
        }

        ctx.putImageData(chunkImgData, 0, j)
        j += FULL_CHUNK_HEIGHT
      }

      if (i < totalChunks) {
        elemsInThisChunk = width * partialChunkHeight * 4
        dest.set(src.subarray(srcPos, srcPos + elemsInThisChunk))

        if (hasTransferMaps) {
          for (let k = 0; k < elemsInThisChunk; k += 4) {
            if (transferMapRed) {
              dest[k + 0] = transferMapRed[dest[k + 0]]
            }

            if (transferMapGreen) {
              dest[k + 1] = transferMapGreen[dest[k + 1]]
            }

            if (transferMapBlue) {
              dest[k + 2] = transferMapBlue[dest[k + 2]]
            }
          }
        }

        ctx.putImageData(chunkImgData, 0, j)
      }
    } else if (imgData.kind === ImageKind.RGB_24BPP) {
      const hasTransferMaps = !!(transferMapRed || transferMapGreen || transferMapBlue)
      thisChunkHeight = FULL_CHUNK_HEIGHT
      elemsInThisChunk = width * thisChunkHeight

      for (i = 0; i < totalChunks; i++) {
        if (i >= fullChunks) {
          thisChunkHeight = partialChunkHeight
          elemsInThisChunk = width * thisChunkHeight
        }

        destPos = 0

        for (j = elemsInThisChunk; j--; ) {
          dest[destPos++] = src[srcPos++]
          dest[destPos++] = src[srcPos++]
          dest[destPos++] = src[srcPos++]
          dest[destPos++] = 255
        }

        if (hasTransferMaps) {
          for (let k = 0; k < destPos; k += 4) {
            if (transferMapRed) {
              dest[k + 0] = transferMapRed[dest[k + 0]]
            }

            if (transferMapGreen) {
              dest[k + 1] = transferMapGreen[dest[k + 1]]
            }

            if (transferMapBlue) {
              dest[k + 2] = transferMapBlue[dest[k + 2]]
            }
          }
        }

        ctx.putImageData(chunkImgData, 0, i * FULL_CHUNK_HEIGHT)
      }
    } else {
      throw new Error(`bad image kind: ${imgData.kind}`)
    }
  }

  private getPdfContext(pageData: Types.PageData): string {
    const context = this.truncateString(pageData.textContext, 50 * 1024)
    return context
  }

  private truncateString(string: string, maxLengthInBytes: number) {
    try {
      // 将字符串转换为UTF-8编码的字节序列
      const encoder = new TextEncoder()
      const bytes = encoder.encode(string)

      // 判断字节长度是否超过指定的最大字节长度
      if (bytes.length > maxLengthInBytes) {
        // 根据最大字节长度截取字节序列
        const truncatedBytes = bytes.slice(0, maxLengthInBytes)

        // 将截取后的字节序列转换为字符串
        const decoder = new TextDecoder()
        const truncatedString = decoder.decode(truncatedBytes)

        return truncatedString
      }
      // 字符串未超过最大字节长度，直接返回原始字符串
      return string
    } catch (error) {
      return string
    }
  }

  // private getItemColor(item: TextItem, operatorList: PDFOperatorList) {
  //   type PDFStatus = any
  //   // 记录状态的堆栈
  //   const stack: PDFStatus[] = []
  //   // 当前状态记录
  //   let currentStatus: PDFStatus = {}

  //   const OPS = Consts.PDF_OPS

  //   // 按顺序分析页面指令
  //   for (let fnIndex = 0; fnIndex < operatorList.fnArray.length; fnIndex++) {
  //     const fn = operatorList.fnArray[fnIndex]
  //     const args = operatorList.argsArray[fnIndex]
  //     switch (fn) {
  //       case OPS.beginMarkedContentProps:
  //         // console.log('beginMarkedContentProps', args)
  //         break
  //       //保存
  //       case OPS.save:
  //         stack.push(currentStatus)
  //         currentStatus = { ...currentStatus }
  //         break
  //       //还原
  //       case OPS.restore:
  //         currentStatus = stack.pop() ?? {}
  //         break
  //       //设置文本填充颜色
  //       case OPS.setFillRGBColor:
  //         currentStatus.currentColor = [args[0], args[1], args[2]]
  //         break
  //       //设置文本区域
  //       case OPS.setTextMatrix:
  //         currentStatus.currentMatrix = [args[4], args[5]]
  //         currentStatus.currentXY = [args[4], args[5]]
  //         break
  //       //设置行距
  //       case OPS.setLeading:
  //         currentStatus.leading = args[0]
  //         break
  //       case OPS.setFillColor:
  //         console.log('setFillColor', args)
  //         break
  //       //设置字体类型和大小
  //       case OPS.setFont:
  //         currentStatus.font = [args[0], args[1]]
  //         break
  //       //计算换行, 换行时当前坐标需要跳到下一行的开头
  //       case OPS.nextLine:
  //       case OPS.nextLineShowText:
  //       case OPS.nextLineSetSpacingShowText:
  //         if (currentStatus.leading && currentStatus.currentXY) {
  //           currentStatus.currentXY = [currentStatus.currentXY[0], currentStatus.currentXY[1] - currentStatus.leading]
  //         }
  //         break
  //       //移动文本坐标
  //       case OPS.moveText:
  //         if (currentStatus.currentXY) {
  //           currentStatus.currentXY = [currentStatus.currentXY[0] + args[0], currentStatus.currentXY[1] + args[1]]
  //         }
  //         break
  //       //显示文本
  //       case OPS.showText:
  //         if (currentStatus.currentXY) {
  //           let x = currentStatus.currentXY[0]
  //           const y = currentStatus.currentXY[1]
  //           // 判断文本是否匹配定位
  //           const isMatch = () => {
  //             return Math.abs(x - item.transform[4]) < item.height / 5 &&
  //               Math.abs(y - item.transform[5]) < item.height / 5
  //           }
  //           if (isMatch()) {
  //             // return currentStatus.currentColor
  //           }
  //           if (args[0]) {
  //             // 计算打印的每个字的实际坐标, 然后和item的坐标进行配对
  //             for (const charInfo of args[0]) {
  //               if (typeof charInfo?.width === 'number' && currentStatus.font) {
  //                 if (isMatch()) {
  //                   // return currentStatus.currentColor
  //                 }
  //                 x += (charInfo?.width / 1000) * currentStatus.font[1]
  //               } else if (typeof charInfo === 'number' && currentStatus.font) {
  //                 if (isMatch()) {
  //                   // return currentStatus.currentColor
  //                 }
  //                 x -= (charInfo / 1000) * currentStatus.font[1]
  //               }
  //             }
  //           }
  //         }
  //         break
  //     }
  //   }
  //   return this.lastColor
  // }
}

export const pdfTranslateHelper = new PdfTranslateHelper()
