from fastapi import FastAPI, File, UploadFile from PIL import Image import pytesseract import io import os from dotenv import load_dotenv # 加载环境变量 load_dotenv() # 设置 Tesseract OCR 路径(从环境变量读取,支持跨平台) TESSERACT_CMD = os.getenv("TESSERACT_CMD") if TESSERACT_CMD: pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD app = FastAPI(title="Simple OCR API") @app.post("/ocr") async def ocr(file: UploadFile = File(...)): # 1. 读取上传的图片 image_bytes = await file.read() image = Image.open(io.BytesIO(image_bytes)) # 2. OCR 识别(获取详细数据和坐标) data = pytesseract.image_to_data( image, lang="chi_sim+eng", config="--psm 6", output_type=pytesseract.Output.DICT ) # 3. 提取文本和坐标信息 results = [] for i in range(len(data['text'])): text = data['text'][i].strip() conf = int(data['conf'][i]) # 只返回有文本内容且置信度大于0的项 if text and conf > 0: results.append({ "text": text, "confidence": conf, "bbox": { "left": data['left'][i], "top": data['top'][i], "width": data['width'][i], "height": data['height'][i] } }) # 4. 返回完整结果 return { "text": " ".join([r["text"] for r in results]), "details": results }