Initial commit
This commit is contained in:
7
.env.example
Normal file
7
.env.example
Normal file
@@ -0,0 +1,7 @@
|
||||
# OCR 环境配置文件模板
|
||||
# 复制此文件为 .env 并填写实际的配置
|
||||
|
||||
# Tesseract OCR 可执行文件路径
|
||||
# Windows 开发环境示例: C:\Program Files\Tesseract-OCR\tesseract.exe
|
||||
# Linux 生产环境示例: /usr/bin/tesseract
|
||||
TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe
|
||||
27
.gitignore
vendored
Normal file
27
.gitignore
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
# 环境变量配置文件(包含敏感路径信息)
|
||||
.env
|
||||
|
||||
# Python 缓存
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
|
||||
# 虚拟环境
|
||||
venv/
|
||||
env/
|
||||
ENV/
|
||||
|
||||
# IDE 配置
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# 测试生成的文件
|
||||
test_image*.png
|
||||
*.log
|
||||
|
||||
# 系统文件
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
33
Dockerfile
Normal file
33
Dockerfile
Normal file
@@ -0,0 +1,33 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# 1️⃣ 安装系统依赖 + Tesseract + 语言包
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-chi-sim \
|
||||
tesseract-ocr-eng \
|
||||
python3 \
|
||||
python3-pip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 2️⃣ 设置工作目录
|
||||
WORKDIR /app
|
||||
|
||||
# 3️⃣ 先拷贝依赖清单(利用 Docker 缓存)
|
||||
COPY requirements.txt .
|
||||
|
||||
# 4️⃣ 安装 Python 依赖(包含 python-dotenv)
|
||||
RUN pip3 install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 5️⃣ 拷贝业务代码
|
||||
COPY . .
|
||||
|
||||
# 6️⃣ 设置生产环境 Tesseract 路径环境变量
|
||||
ENV TESSERACT_CMD=/usr/bin/tesseract
|
||||
|
||||
# 7️⃣ 启动 FastAPI(单进程,稳定)
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
|
||||
95
README.md
Normal file
95
README.md
Normal file
@@ -0,0 +1,95 @@
|
||||
# XOcr Api - 文字识别 API
|
||||
|
||||
基于 FastAPI + Tesseract OCR 的文字识别服务,支持中英文识别和文本坐标返回。
|
||||
|
||||
## 环境配置
|
||||
|
||||
### 开发环境 (Windows)
|
||||
|
||||
1. 安装依赖:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. 配置 `.env` 文件(已创建):
|
||||
```env
|
||||
TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe
|
||||
```
|
||||
|
||||
3. 启动服务:
|
||||
```bash
|
||||
uvicorn main:app --host 0.0.0.0 --port 8080 --reload
|
||||
```
|
||||
|
||||
### 生产环境 (Docker)
|
||||
|
||||
```bash
|
||||
# 构建镜像
|
||||
docker-compose build
|
||||
|
||||
# 启动服务
|
||||
docker-compose up -d
|
||||
|
||||
# 查看日志
|
||||
docker-compose logs -f
|
||||
```
|
||||
|
||||
## API 使用
|
||||
|
||||
### POST /ocr
|
||||
|
||||
上传图片进行文字识别,返回识别文本和坐标信息。
|
||||
|
||||
**请求:**
|
||||
```bash
|
||||
curl -X POST "http://localhost:8080/ocr" \
|
||||
-F "file=@test_image.png"
|
||||
```
|
||||
|
||||
**响应:**
|
||||
```json
|
||||
{
|
||||
"text": "完整识别文本",
|
||||
"details": [
|
||||
{
|
||||
"text": "文本内容",
|
||||
"confidence": 95,
|
||||
"bbox": {
|
||||
"left": 10,
|
||||
"top": 30,
|
||||
"width": 100,
|
||||
"height": 20
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## 测试
|
||||
|
||||
运行测试脚本:
|
||||
```bash
|
||||
python test_ocr_bbox.py
|
||||
```
|
||||
|
||||
会自动创建测试图片,调用 API,并生成带有边界框的可视化结果。
|
||||
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
XOcr_Api/
|
||||
├── main.py # FastAPI 应用
|
||||
├── requirements.txt # Python 依赖
|
||||
├── Dockerfile # Docker 镜像构建
|
||||
├── docker-compose.yml # Docker Compose 配置
|
||||
├── .env # 开发环境配置(不提交)
|
||||
├── .env.example # 环境配置模板
|
||||
├── .gitignore # Git 忽略文件
|
||||
└── test_ocr_bbox.py # 测试脚本
|
||||
```
|
||||
|
||||
## 配置说明
|
||||
|
||||
- **TESSERACT_CMD**: Tesseract OCR 可执行文件路径
|
||||
- Windows: `C:\Program Files\Tesseract-OCR\tesseract.exe`
|
||||
- Linux: `/usr/bin/tesseract`(Docker 环境已自动配置)
|
||||
11
docker-compose.yml
Normal file
11
docker-compose.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
ocr:
|
||||
image: ocr-api
|
||||
container_name: ocr
|
||||
build: .
|
||||
ports:
|
||||
- "8080:8080"
|
||||
restart: always
|
||||
|
||||
56
main.py
Normal file
56
main.py
Normal file
@@ -0,0 +1,56 @@
|
||||
from fastapi import FastAPI, File, UploadFile
|
||||
from PIL import Image
|
||||
import pytesseract
|
||||
import io
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 加载环境变量
|
||||
load_dotenv()
|
||||
|
||||
# 设置 Tesseract OCR 路径(从环境变量读取,支持跨平台)
|
||||
TESSERACT_CMD = os.getenv("TESSERACT_CMD")
|
||||
if TESSERACT_CMD:
|
||||
pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD
|
||||
|
||||
app = FastAPI(title="Simple OCR API")
|
||||
|
||||
@app.post("/ocr")
|
||||
async def ocr(file: UploadFile = File(...)):
|
||||
# 1. 读取上传的图片
|
||||
image_bytes = await file.read()
|
||||
image = Image.open(io.BytesIO(image_bytes))
|
||||
|
||||
# 2. OCR 识别(获取详细数据和坐标)
|
||||
data = pytesseract.image_to_data(
|
||||
image,
|
||||
lang="chi_sim+eng",
|
||||
config="--psm 6",
|
||||
output_type=pytesseract.Output.DICT
|
||||
)
|
||||
|
||||
# 3. 提取文本和坐标信息
|
||||
results = []
|
||||
for i in range(len(data['text'])):
|
||||
text = data['text'][i].strip()
|
||||
conf = int(data['conf'][i])
|
||||
|
||||
# 只返回有文本内容且置信度大于0的项
|
||||
if text and conf > 0:
|
||||
results.append({
|
||||
"text": text,
|
||||
"confidence": conf,
|
||||
"bbox": {
|
||||
"left": data['left'][i],
|
||||
"top": data['top'][i],
|
||||
"width": data['width'][i],
|
||||
"height": data['height'][i]
|
||||
}
|
||||
})
|
||||
|
||||
# 4. 返回完整结果
|
||||
return {
|
||||
"text": " ".join([r["text"] for r in results]),
|
||||
"details": results
|
||||
}
|
||||
|
||||
7
requirements.txt
Normal file
7
requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
pillow
|
||||
pytesseract
|
||||
python-multipart
|
||||
requests
|
||||
python-dotenv
|
||||
Reference in New Issue
Block a user