Initial commit

2026-02-01 21:30:08 +08:00
commit 60e39b98ac
7 changed files with 236 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,7 @@
 # OCR 环境配置文件模板
 # 复制此文件为 .env 并填写实际的配置
 # Tesseract OCR 可执行文件路径
 # Windows 开发环境示例: C:\Program Files\Tesseract-OCR\tesseract.exe
 # Linux 生产环境示例: /usr/bin/tesseract
 TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,27 @@
 # 环境变量配置文件（包含敏感路径信息）
 .env
 # Python 缓存
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 # 虚拟环境
 venv/
 env/
 ENV/
 # IDE 配置
 .vscode/
 .idea/
 *.swp
 *.swo
 # 测试生成的文件
 test_image*.png
 *.log
 # 系统文件
 .DS_Store
 Thumbs.db
--- a/33
+++ b/33
@@ -0,0 +1,33 @@
 FROM ubuntu:22.04
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 # 1️⃣ 安装系统依赖 + Tesseract + 语言包
 RUN apt-get update && \
    apt-get install -y \
        tesseract-ocr \
        tesseract-ocr-chi-sim \
        tesseract-ocr-eng \
        python3 \
        python3-pip \
    && rm -rf /var/lib/apt/lists/*
 # 2️⃣ 设置工作目录
 WORKDIR /app
 # 3️⃣ 先拷贝依赖清单（利用 Docker 缓存）
 COPY requirements.txt .
 # 4️⃣ 安装 Python 依赖（包含 python-dotenv）
 RUN pip3 install --no-cache-dir -r requirements.txt
 # 5️⃣ 拷贝业务代码
 COPY . .
 # 6️⃣ 设置生产环境 Tesseract 路径环境变量
 ENV TESSERACT_CMD=/usr/bin/tesseract
 # 7️⃣ 启动 FastAPI（单进程，稳定）
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,95 @@
 # XOcr Api - 文字识别 API
 基于 FastAPI + Tesseract OCR 的文字识别服务，支持中英文识别和文本坐标返回。
 ## 环境配置
 ### 开发环境 (Windows)
 1. 安装依赖：
 ```bash
 pip install -r requirements.txt
 ```
 2. 配置 `.env` 文件（已创建）：
 ```env
 TESSERACT_CMD=C:\Program Files\Tesseract-OCR\tesseract.exe
 ```
 3. 启动服务：
 ```bash
 uvicorn main:app --host 0.0.0.0 --port 8080 --reload
 ```
 ### 生产环境 (Docker)
 ```bash
 # 构建镜像
 docker-compose build
 # 启动服务
 docker-compose up -d
 # 查看日志
 docker-compose logs -f
 ```
 ## API 使用
 ### POST /ocr
 上传图片进行文字识别，返回识别文本和坐标信息。
 **请求：**
 ```bash
 curl -X POST "http://localhost:8080/ocr" \
  -F "file=@test_image.png"
 ```
 **响应：**
 ```json
 {
  "text": "完整识别文本",
  "details": [
    {
      "text": "文本内容",
      "confidence": 95,
      "bbox": {
        "left": 10,
        "top": 30,
        "width": 100,
        "height": 20
      }
    }
  ]
 }
 ```
 ## 测试
 运行测试脚本：
 ```bash
 python test_ocr_bbox.py
 ```
 会自动创建测试图片，调用 API，并生成带有边界框的可视化结果。
 ## 项目结构
 ```
 XOcr_Api/
 ├── main.py              # FastAPI 应用
 ├── requirements.txt     # Python 依赖
 ├── Dockerfile          # Docker 镜像构建
 ├── docker-compose.yml  # Docker Compose 配置
 ├── .env                # 开发环境配置（不提交）
 ├── .env.example        # 环境配置模板
 ├── .gitignore          # Git 忽略文件
 └── test_ocr_bbox.py    # 测试脚本
 ```
 ## 配置说明
 - **TESSERACT_CMD**: Tesseract OCR 可执行文件路径
  - Windows: `C:\Program Files\Tesseract-OCR\tesseract.exe`
  - Linux: `/usr/bin/tesseract`（Docker 环境已自动配置）
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,11 @@
 version: "3.9"
 services:
  ocr:
    image: ocr-api
    container_name: ocr
    build: .
    ports:
      - "8080:8080"
    restart: always
--- a/main.py
+++ b/main.py
@@ -0,0 +1,56 @@
 from fastapi import FastAPI, File, UploadFile
 from PIL import Image
 import pytesseract
 import io
 import os
 from dotenv import load_dotenv
 # 加载环境变量
 load_dotenv()
 # 设置 Tesseract OCR 路径（从环境变量读取，支持跨平台）
 TESSERACT_CMD = os.getenv("TESSERACT_CMD")
 if TESSERACT_CMD:
    pytesseract.pytesseract.tesseract_cmd = TESSERACT_CMD
 app = FastAPI(title="Simple OCR API")
@app.post("/ocr")
 async def ocr(file: UploadFile = File(...)):
    # 1. 读取上传的图片
    image_bytes = await file.read()
    image = Image.open(io.BytesIO(image_bytes))
    # 2. OCR 识别（获取详细数据和坐标）
    data = pytesseract.image_to_data(
        image,
        lang="chi_sim+eng",
        config="--psm 6",
        output_type=pytesseract.Output.DICT
    )
    # 3. 提取文本和坐标信息
    results = []
    for i in range(len(data['text'])):
        text = data['text'][i].strip()
        conf = int(data['conf'][i])
        # 只返回有文本内容且置信度大于0的项
        if text and conf > 0:
            results.append({
                "text": text,
                "confidence": conf,
                "bbox": {
                    "left": data['left'][i],
                    "top": data['top'][i],
                    "width": data['width'][i],
                    "height": data['height'][i]
                }
            })
    # 4. 返回完整结果
    return {
        "text": " ".join([r["text"] for r in results]),
        "details": results
    }
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,7 @@
 fastapi
 uvicorn
 pillow
 pytesseract
 python-multipart
 requests
 python-dotenv