xocr-api/main.py

from fastapi import FastAPI, File, UploadFile
from PIL import Image
import pytesseract
import io
import os
from dotenv import load_dotenv

# 加载环境变量
load_dotenv()

# 设置 Tesseract OCR 路径（从环境变量读取，支持跨平台）
TESSERACT_CMD = os.getenv("TESSERACT_CMD")
if TESSERACT_CMD:
    pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD

app = FastAPI(title="Simple OCR API")

@app.post("/ocr")
async def ocr(file: UploadFile = File(...)):
    # 1. 读取上传的图片
    image_bytes = await file.read()
    image = Image.open(io.BytesIO(image_bytes))

    # 2. OCR 识别（获取详细数据和坐标）
    data = pytesseract.image_to_data(
        image,
        lang="chi_sim+eng",
        config="--psm 6",
        output_type=pytesseract.Output.DICT
    )

    # 3. 提取文本和坐标信息
    results = []
    for i in range(len(data['text'])):
        text = data['text'][i].strip()
        conf = int(data['conf'][i])

        # 只返回有文本内容且置信度大于0的项
        if text and conf > 0:
            results.append({
                "text": text,
                "confidence": conf,
                "bbox": {
                    "left": data['left'][i],
                    "top": data['top'][i],
                    "width": data['width'][i],
                    "height": data['height'][i]
                }
            })

    # 4. 返回完整结果
    return {
        "text": " ".join([r["text"] for r in results]),
        "details": results
    }