commit 60e39b98ac16a3f7477f6bec9872f5271c9795c1 Author: Mr.Xia <1424473282@qq.com> Date: Sun Feb 1 21:30:08 2026 +0800 Initial commit diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..2cf578e --- /dev/null +++ b/.env.example @@ -0,0 +1,7 @@ +# OCR 环境配置文件模板 +# 复制此文件为 .env 并填写实际的配置 + +# Tesseract OCR 可执行文件路径 +# Windows 开发环境示例: C:\Program Files\Tesseract-OCR\tesseract.exe +# Linux 生产环境示例: /usr/bin/tesseract +TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..04ab9be --- /dev/null +++ b/.gitignore @@ -0,0 +1,27 @@ +# 环境变量配置文件(包含敏感路径信息) +.env + +# Python 缓存 +__pycache__/ +*.py[cod] +*$py.class +*.so + +# 虚拟环境 +venv/ +env/ +ENV/ + +# IDE 配置 +.vscode/ +.idea/ +*.swp +*.swo + +# 测试生成的文件 +test_image*.png +*.log + +# 系统文件 +.DS_Store +Thumbs.db diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..845cee4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 + +# 1️⃣ 安装系统依赖 + Tesseract + 语言包 +RUN apt-get update && \ + apt-get install -y \ + tesseract-ocr \ + tesseract-ocr-chi-sim \ + tesseract-ocr-eng \ + python3 \ + python3-pip \ + && rm -rf /var/lib/apt/lists/* + +# 2️⃣ 设置工作目录 +WORKDIR /app + +# 3️⃣ 先拷贝依赖清单(利用 Docker 缓存) +COPY requirements.txt . + +# 4️⃣ 安装 Python 依赖(包含 python-dotenv) +RUN pip3 install --no-cache-dir -r requirements.txt + +# 5️⃣ 拷贝业务代码 +COPY . . + +# 6️⃣ 设置生产环境 Tesseract 路径环境变量 +ENV TESSERACT_CMD=/usr/bin/tesseract + +# 7️⃣ 启动 FastAPI(单进程,稳定) +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] + diff --git a/README.md b/README.md new file mode 100644 index 0000000..2ad373d --- /dev/null +++ b/README.md @@ -0,0 +1,95 @@ +# XOcr Api - 文字识别 API + +基于 FastAPI + Tesseract OCR 的文字识别服务,支持中英文识别和文本坐标返回。 + +## 环境配置 + +### 开发环境 (Windows) + +1. 安装依赖: +```bash +pip install -r requirements.txt +``` + +2. 配置 `.env` 文件(已创建): +```env +TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe +``` + +3. 启动服务: +```bash +uvicorn main:app --host 0.0.0.0 --port 8080 --reload +``` + +### 生产环境 (Docker) + +```bash +# 构建镜像 +docker-compose build + +# 启动服务 +docker-compose up -d + +# 查看日志 +docker-compose logs -f +``` + +## API 使用 + +### POST /ocr + +上传图片进行文字识别,返回识别文本和坐标信息。 + +**请求:** +```bash +curl -X POST "http://localhost:8080/ocr" \ + -F "file=@test_image.png" +``` + +**响应:** +```json +{ + "text": "完整识别文本", + "details": [ + { + "text": "文本内容", + "confidence": 95, + "bbox": { + "left": 10, + "top": 30, + "width": 100, + "height": 20 + } + } + ] +} +``` + +## 测试 + +运行测试脚本: +```bash +python test_ocr_bbox.py +``` + +会自动创建测试图片,调用 API,并生成带有边界框的可视化结果。 + +## 项目结构 + +``` +XOcr_Api/ +├── main.py # FastAPI 应用 +├── requirements.txt # Python 依赖 +├── Dockerfile # Docker 镜像构建 +├── docker-compose.yml # Docker Compose 配置 +├── .env # 开发环境配置(不提交) +├── .env.example # 环境配置模板 +├── .gitignore # Git 忽略文件 +└── test_ocr_bbox.py # 测试脚本 +``` + +## 配置说明 + +- **TESSERACT_CMD**: Tesseract OCR 可执行文件路径 + - Windows: `C:\Program Files\Tesseract-OCR\tesseract.exe` + - Linux: `/usr/bin/tesseract`(Docker 环境已自动配置) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0051f4a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + ocr: + image: ocr-api + container_name: ocr + build: . + ports: + - "8080:8080" + restart: always + diff --git a/main.py b/main.py new file mode 100644 index 0000000..931eb8b --- /dev/null +++ b/main.py @@ -0,0 +1,56 @@ +from fastapi import FastAPI, File, UploadFile +from PIL import Image +import pytesseract +import io +import os +from dotenv import load_dotenv + +# 加载环境变量 +load_dotenv() + +# 设置 Tesseract OCR 路径(从环境变量读取,支持跨平台) +TESSERACT_CMD = os.getenv("TESSERACT_CMD") +if TESSERACT_CMD: + pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD + +app = FastAPI(title="Simple OCR API") + +@app.post("/ocr") +async def ocr(file: UploadFile = File(...)): + # 1. 读取上传的图片 + image_bytes = await file.read() + image = Image.open(io.BytesIO(image_bytes)) + + # 2. OCR 识别(获取详细数据和坐标) + data = pytesseract.image_to_data( + image, + lang="chi_sim+eng", + config="--psm 6", + output_type=pytesseract.Output.DICT + ) + + # 3. 提取文本和坐标信息 + results = [] + for i in range(len(data['text'])): + text = data['text'][i].strip() + conf = int(data['conf'][i]) + + # 只返回有文本内容且置信度大于0的项 + if text and conf > 0: + results.append({ + "text": text, + "confidence": conf, + "bbox": { + "left": data['left'][i], + "top": data['top'][i], + "width": data['width'][i], + "height": data['height'][i] + } + }) + + # 4. 返回完整结果 + return { + "text": " ".join([r["text"] for r in results]), + "details": results + } + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9b15c35 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +fastapi +uvicorn +pillow +pytesseract +python-multipart +requests +python-dotenv