From 446d8e30c1dfe0e2d0638d3ff7fd079971c4f108 Mon Sep 17 00:00:00 2001 From: mor Date: Tue, 23 Sep 2025 11:41:42 +0530 Subject: [PATCH 1/5] Add Replicate support with cog.yaml and predict.py --- cog.yaml | 16 +++++++++++++ predict.py | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 cog.yaml create mode 100644 predict.py diff --git a/cog.yaml b/cog.yaml new file mode 100644 index 0000000..b1c8902 --- /dev/null +++ b/cog.yaml @@ -0,0 +1,16 @@ +build: + python_version: "3.10" + system_packages: + - "libgl1" + - "libglib2.0-0" + python_packages: + - torch==2.0.1 + - torchvision==0.15.2 + - opencv-python + - matplotlib + - scikit-image + - scipy + - Pillow + - numpy + +predict: "predict.py:Predictor" diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..a8d1bad --- /dev/null +++ b/predict.py @@ -0,0 +1,67 @@ +import os +import cv2 +import torch +import numpy as np +from cog import BasePredictor, Input, Path + +import craft_utils +import imgproc +from craft import CRAFT + + +class Predictor(BasePredictor): + def setup(self): + # Load model once + self.net = CRAFT() + weight_path = "craft_mlt_25k.pth" + + # Download weights if not present + if not os.path.exists(weight_path): + import requests + url = "https://github.com/clovaai/CRAFT-pytorch/releases/download/1.0/craft_mlt_25k.pth" + r = requests.get(url) + with open(weight_path, "wb") as f: + f.write(r.content) + + self.net.load_state_dict( + torch.load(weight_path, map_location="cpu") + ) + self.net.eval() + + def predict( + self, + image: Path = Input(description="Input image"), + text_threshold: float = Input(default=0.7, description="Text confidence threshold"), + link_threshold: float = Input(default=0.4, description="Link confidence threshold"), + low_text: float = Input(default=0.4, description="Low text threshold"), + ) -> dict: + # Load image + img = imgproc.loadImage(str(image)) + + # Run detection + bboxes, polys, score_text = craft_utils.test_net( + self.net, + img, + text_threshold=text_threshold, + link_threshold=link_threshold, + low_text=low_text, + cuda=False, + ) + + # Draw boxes on image + vis = img.copy() + for box in bboxes: + pts = np.array(box).astype(np.int32).reshape((-1, 1, 2)) + cv2.polylines(vis, [pts], True, (0, 255, 0), 2) + + # Save output image + out_path = "/tmp/output.jpg" + cv2.imwrite(out_path, vis) + + # Convert boxes to JSON serializable format + boxes_json = [np.array(box).astype(float).tolist() for box in bboxes] + + return { + "output_image": Path(out_path), + "boxes": boxes_json, + } From b32c9ab8c9a9723965ee9a2a447a22c53c3e9207 Mon Sep 17 00:00:00 2001 From: mor Date: Tue, 23 Sep 2025 12:57:58 +0530 Subject: [PATCH 2/5] Add Replicate push workflow --- .github/workflows/replicate-push.yml | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/replicate-push.yml diff --git a/.github/workflows/replicate-push.yml b/.github/workflows/replicate-push.yml new file mode 100644 index 0000000..40db636 --- /dev/null +++ b/.github/workflows/replicate-push.yml @@ -0,0 +1,38 @@ +name: Cog Safe Push + +on: + workflow_dispatch: + inputs: + model: + description: "owner/model-name (Replicate)" + required: true + +jobs: + push: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + + # Install Docker Buildx (recommended for Cog builds) + - uses: docker/setup-buildx-action@v3 + + # Install Cog (CLI) + - name: Install Cog + run: | + curl -o /usr/local/bin/cog -L https://github.com/replicate/cog/releases/latest/download/cog_`uname -s`_`uname -m` + chmod +x /usr/local/bin/cog + cog --version + + # (Optional) Verify weights exist + - name: List weights + run: ls -lah weights || true + + # Push safely to Replicate (CPU test by default; for GPU, see note below) + - name: Cog Safe Push + env: + REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }} + run: | + pip install cog-safe-push + cog-safe-push --test-hardware=cpu ${{ github.event.inputs.model }} From 63032a91976f7f8c596c29316a5a7de0b8a6f78e Mon Sep 17 00:00:00 2001 From: mor Date: Tue, 23 Sep 2025 15:34:14 +0530 Subject: [PATCH 3/5] Add Replicate push workflow --- .github/workflows/replicate-push.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/replicate-push.yml b/.github/workflows/replicate-push.yml index 40db636..fbb24ae 100644 --- a/.github/workflows/replicate-push.yml +++ b/.github/workflows/replicate-push.yml @@ -6,6 +6,7 @@ on: model: description: "owner/model-name (Replicate)" required: true + type: string jobs: push: @@ -15,24 +16,30 @@ jobs: steps: - uses: actions/checkout@v4 - # Install Docker Buildx (recommended for Cog builds) - uses: docker/setup-buildx-action@v3 # Install Cog (CLI) - name: Install Cog run: | - curl -o /usr/local/bin/cog -L https://github.com/replicate/cog/releases/latest/download/cog_`uname -s`_`uname -m` - chmod +x /usr/local/bin/cog + sudo curl -o /usr/local/bin/cog -L https://github.com/replicate/cog/releases/latest/download/cog_`uname -s`_`uname -m` + sudo chmod +x /usr/local/bin/cog cog --version + # Authenticate Cog (required to push) + - name: Cog login + run: | + echo "${{ secrets.REPLICATE_CLI_AUTH_TOKEN }}" | cog login --token-stdin + # (Optional) Verify weights exist - name: List weights run: ls -lah weights || true - # Push safely to Replicate (CPU test by default; for GPU, see note below) + # Push safely to Replicate (runs a quick test on CPU) - name: Cog Safe Push env: REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }} run: | - pip install cog-safe-push - cog-safe-push --test-hardware=cpu ${{ github.event.inputs.model }} + pip install --upgrade pip + pip install "git+https://github.com/replicate/cog-safe-push.git" + # If you don't set ANTHROPIC_API_KEY, add --no-compare-outputs + cog-safe-push --test-hardware=cpu --no-compare-outputs "${{ inputs.model }}" From 2f0ba8faafb718c70ff8bc658ecbd4d88e0bd93c Mon Sep 17 00:00:00 2001 From: mor Date: Tue, 23 Sep 2025 16:02:46 +0530 Subject: [PATCH 4/5] Add Replicate push workflow --- .gitignore | 2 +- README.md | 62 ++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 8bd0b28..307cb62 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,4 @@ *.pkl *.pth result* -weights* \ No newline at end of file +# weights* \ No newline at end of file diff --git a/README.md b/README.md index 8c185eb..9858958 100755 --- a/README.md +++ b/README.md @@ -1,74 +1,83 @@ ## CRAFT: Character-Region Awareness For Text detection + Official Pytorch implementation of CRAFT text detector | [Paper](https://arxiv.org/abs/1904.01941) | [Pretrained Model](https://drive.google.com/open?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ) | [Supplementary](https://youtu.be/HI8MzpY8KMI) **[Youngmin Baek](mailto:youngmin.baek@navercorp.com), Bado Lee, Dongyoon Han, Sangdoo Yun, Hwalsuk Lee.** - + Clova AI Research, NAVER Corp. ### Sample Results ### Overview -PyTorch implementation for CRAFT text detector that effectively detect text area by exploring each character region and affinity between characters. The bounding box of texts are obtained by simply finding minimum bounding rectangles on binary map after thresholding character region and affinity scores. + +PyTorch implementation for CRAFT text detector that effectively detect text area by exploring each character region and affinity between characters. The bounding box of texts are obtained by simply finding minimum bounding rectangles on binary map after thresholding character region and affinity scores. teaser ## Updates + **13 Jun, 2019**: Initial update **20 Jul, 2019**: Added post-processing for polygon result **28 Sep, 2019**: Added the trained model on IC15 and the link refiner - ## Getting started + ### Install dependencies + #### Requirements + - PyTorch>=0.4.1 - torchvision>=0.2.1 - opencv-python>=3.4.2 - check requiremtns.txt + ``` pip install -r requirements.txt ``` ### Training -The code for training is not included in this repository, and we cannot release the full training code for IP reason. +The code for training is not included in this repository, and we cannot release the full training code for IP reason. ### Test instruction using pretrained model + - Download the trained models - - *Model name* | *Used datasets* | *Languages* | *Purpose* | *Model Link* | - | :--- | :--- | :--- | :--- | :--- | -General | SynthText, IC13, IC17 | Eng + MLT | For general purpose | [Click](https://drive.google.com/open?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ) -IC15 | SynthText, IC15 | Eng | For IC15 only | [Click](https://drive.google.com/open?id=1i2R7UIUqmkUtF0jv_3MXTqmQ_9wuAnLf) -LinkRefiner | CTW1500 | - | Used with the General Model | [Click](https://drive.google.com/open?id=1XSaFwBkOaFOdtk4Ane3DFyJGPRw6v5bO) + | _Model name_ | _Used datasets_ | _Languages_ | _Purpose_ | _Model Link_ | + | :----------- | :-------------------- | :---------- | :-------------------------- | :-------------------------------------------------------------------------- | + | General | SynthText, IC13, IC17 | Eng + MLT | For general purpose | [Click](https://drive.google.com/open?id=1Jk4eGD7crsqCCg9C9VjCLkMN3ze8kutZ) | + | IC15 | SynthText, IC15 | Eng | For IC15 only | [Click](https://drive.google.com/open?id=1i2R7UIUqmkUtF0jv_3MXTqmQ_9wuAnLf) | + | LinkRefiner | CTW1500 | - | Used with the General Model | [Click](https://drive.google.com/open?id=1XSaFwBkOaFOdtk4Ane3DFyJGPRw6v5bO) | * Run with pretrained model -``` (with python 3.7) + +```(with python 3.7) python test.py --trained_model=[weightfile] --test_folder=[folder path to test images] ``` The result image and socre maps will be saved to `./result` by default. ### Arguments -* `--trained_model`: pretrained model -* `--text_threshold`: text confidence threshold -* `--low_text`: text low-bound score -* `--link_threshold`: link confidence threshold -* `--cuda`: use cuda for inference (default:True) -* `--canvas_size`: max image size for inference -* `--mag_ratio`: image magnification ratio -* `--poly`: enable polygon type result -* `--show_time`: show processing time -* `--test_folder`: folder path to input images -* `--refine`: use link refiner for sentense-level dataset -* `--refiner_model`: pretrained refiner model +- `--trained_model`: pretrained model +- `--text_threshold`: text confidence threshold +- `--low_text`: text low-bound score +- `--link_threshold`: link confidence threshold +- `--cuda`: use cuda for inference (default:True) +- `--canvas_size`: max image size for inference +- `--mag_ratio`: image magnification ratio +- `--poly`: enable polygon type result +- `--show_time`: show processing time +- `--test_folder`: folder path to input images +- `--refine`: use link refiner for sentense-level dataset +- `--refiner_model`: pretrained refiner model ## Links + - WebDemo : https://demo.ocr.clova.ai/ - Repo of recognition : https://github.com/clovaai/deep-text-recognition-benchmark ## Citation + ``` @inproceedings{baek2019character, title={Character Region Awareness for Text Detection}, @@ -80,6 +89,7 @@ The result image and socre maps will be saved to `./result` by default. ``` ## License + ``` Copyright (c) 2019-present NAVER Corp. @@ -101,3 +111,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ``` + +--- + +cd D:\python\CRAFT-pytorch +python -m venv venv +.\venv\Scripts\Activate.ps1 From ec2830d0f8428dbb3340fff7992876fc46a7b4c1 Mon Sep 17 00:00:00 2001 From: mor Date: Tue, 23 Sep 2025 16:18:17 +0530 Subject: [PATCH 5/5] Add Replicate push workflow --- .github/workflows/replicate-push.yml | 38 ++++++---------------------- 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/.github/workflows/replicate-push.yml b/.github/workflows/replicate-push.yml index fbb24ae..aeee44f 100644 --- a/.github/workflows/replicate-push.yml +++ b/.github/workflows/replicate-push.yml @@ -1,9 +1,9 @@ -name: Cog Safe Push +name: Push to Replicate on: workflow_dispatch: inputs: - model: + model_name: description: "owner/model-name (Replicate)" required: true type: string @@ -11,35 +11,13 @@ on: jobs: push: runs-on: ubuntu-latest - permissions: - contents: read steps: - uses: actions/checkout@v4 - - uses: docker/setup-buildx-action@v3 + - name: Setup Cog + uses: replicate/setup-cog@v2 + with: + token: ${{ secrets.REPLICATE_API_TOKEN }} - # Install Cog (CLI) - - name: Install Cog - run: | - sudo curl -o /usr/local/bin/cog -L https://github.com/replicate/cog/releases/latest/download/cog_`uname -s`_`uname -m` - sudo chmod +x /usr/local/bin/cog - cog --version - - # Authenticate Cog (required to push) - - name: Cog login - run: | - echo "${{ secrets.REPLICATE_CLI_AUTH_TOKEN }}" | cog login --token-stdin - - # (Optional) Verify weights exist - - name: List weights - run: ls -lah weights || true - - # Push safely to Replicate (runs a quick test on CPU) - - name: Cog Safe Push - env: - REPLICATE_API_TOKEN: ${{ secrets.REPLICATE_API_TOKEN }} - run: | - pip install --upgrade pip - pip install "git+https://github.com/replicate/cog-safe-push.git" - # If you don't set ANTHROPIC_API_KEY, add --no-compare-outputs - cog-safe-push --test-hardware=cpu --no-compare-outputs "${{ inputs.model }}" + - name: Push + run: cog push r8.im/${{ inputs.model_name }}