57 lines
1.5 KiB
Python
57 lines
1.5 KiB
Python
from fastapi import FastAPI, File, UploadFile
|
|
from PIL import Image
|
|
import pytesseract
|
|
import io
|
|
import os
|
|
from dotenv import load_dotenv
|
|
|
|
# 加载环境变量
|
|
load_dotenv()
|
|
|
|
# 设置 Tesseract OCR 路径(从环境变量读取,支持跨平台)
|
|
TESSERACT_CMD = os.getenv("TESSERACT_CMD")
|
|
if TESSERACT_CMD:
|
|
pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD
|
|
|
|
app = FastAPI(title="Simple OCR API")
|
|
|
|
@app.post("/ocr")
|
|
async def ocr(file: UploadFile = File(...)):
|
|
# 1. 读取上传的图片
|
|
image_bytes = await file.read()
|
|
image = Image.open(io.BytesIO(image_bytes))
|
|
|
|
# 2. OCR 识别(获取详细数据和坐标)
|
|
data = pytesseract.image_to_data(
|
|
image,
|
|
lang="chi_sim+eng",
|
|
config="--psm 6",
|
|
output_type=pytesseract.Output.DICT
|
|
)
|
|
|
|
# 3. 提取文本和坐标信息
|
|
results = []
|
|
for i in range(len(data['text'])):
|
|
text = data['text'][i].strip()
|
|
conf = int(data['conf'][i])
|
|
|
|
# 只返回有文本内容且置信度大于0的项
|
|
if text and conf > 0:
|
|
results.append({
|
|
"text": text,
|
|
"confidence": conf,
|
|
"bbox": {
|
|
"left": data['left'][i],
|
|
"top": data['top'][i],
|
|
"width": data['width'][i],
|
|
"height": data['height'][i]
|
|
}
|
|
})
|
|
|
|
# 4. 返回完整结果
|
|
return {
|
|
"text": " ".join([r["text"] for r in results]),
|
|
"details": results
|
|
}
|
|
|