diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bb0224d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.github/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bf32614 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,48 @@ +FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04 + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + git \ + wget \ + curl \ + build-essential \ + software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa -y \ + && apt-get update && apt-get install -y \ + python3.10 \ + python3.10-dev \ + python3.10-venv \ + git-lfs \ + ffmpeg \ + && ln -sf /usr/bin/python3.10 /usr/bin/python \ + && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 \ + && rm -rf /var/lib/apt/lists/* + +# Set working directory +WORKDIR /app + +# Clone the ThinkSound repository +RUN git clone https://github.com/liuhuadai/ThinkSound.git . + +RUN pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu126 +RUN pip install -r requirements.txt + +# Create a non-root user +RUN useradd -m -u 1000 thinksound && \ + chown -R thinksound:thinksound /app +USER thinksound + +RUN chmod +x scripts/demo.sh + +# Expose port for Gradio web interface +EXPOSE 7860 + +# Set default command to launch the web interface +CMD ["python", "app.py"] + +# Alternative commands (uncomment as needed): +# For interactive bash session: +# CMD ["/bin/bash"] + +# For running demo script (requires arguments): +# ENTRYPOINT ["./scripts/demo.sh"] diff --git a/README.md b/README.md index e45bc56..b31c544 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,72 @@ git clone https://huggingface.co/liuhuadai/ThinkSound ckpts ### ▶️ Run the Demo +#### **Docker for WSL/Ubuntu + +Use to run this with own workspace that already clone this repo + models + +Prerequisite + +1. Download all required models. Warning. It is large + +```bash + +sudo apt install git-lfs + +git clone https://huggingface.co/facebook/metaclip-h14-fullcc2.5b +git clone https://huggingface.co/google/t5-v1_1-xl +git clone https://huggingface.co/liuhuadai/ThinkSound ckpts +``` + +2. Move all the models to the root of this repository + +Pull ready docker image + +1. If your gpu support cuda 12.6.x, you can pull this image + +```bash +docker pull sasuketaichou/sajenakcube:thinksound +``` + +Note: You can skip Build local step if you do this, go to Run docker step + +Build local + +Note: Please check your supported nvidia cuda version with your device. Change `FROM cuda-version-of-your-device` of Dockerfile + +1. Run at the root of this repository + +```bash +docker build -t thinksound:latest . +``` + +Run docker + +1. Append your local ThinkSound workspace with the models that we just downloaded. (this is done in start_docker.sh) + +2. To attach ThinkSound folder via script + +```bash +cd .. +ls ## make sure ThinkSound folder is visible +``` + +3. Run the script + +If pull from ready docker image + +```bash +docker run --gpus all -it -v $(pwd)/ThinkSound:/app --rm -p 7860:7860 --net=host sasuketaichou/sajenakcube:thinksound +``` + +If build local + +```bash +docker run --gpus all -it -v $(pwd)/ThinkSound:/app --rm -p 7860:7860 --net=host thinksound:latest +``` + +Test via browser `localhost:7860` + #### **Linux/macOS** ```bash diff --git a/extract_latents.py b/extract_latents.py index 145b8e9..659c533 100644 --- a/extract_latents.py +++ b/extract_latents.py @@ -52,7 +52,7 @@ def main(args): dataloader = DataLoader( dataset, batch_size=2, - num_workers=2, + # num_workers=2, pin_memory=False, drop_last=False, collate_fn=error_avoidance_collate