docs: update for finetunes

phodal · phodal · commit 9b28ee2bc989 · 2023-12-25T20:42:07.000+08:00
diff --git a/README.md b/README.md
@@ -510,11 +510,37 @@ TODO
 
 ## 步骤 3：围绕意图的数据工程与模型演进
 
-### IDE 指令设计
+### IDE 指令设计与演化
+
+#### 模板指令
+
+如下是在 AutoDev 中精简化后的 Prompt 示例：
+
+    Write unit test for following code.
+    
+    ${context.testFramework}
+    ${context.coreFramework}
+    ${context.testSpec}
+    
+    ```${context.language}
+    ${context.related_model} 
+    ${context.selection}
+    ```
+
+其中包含了：
+
+- 技术栈上下文
+- 测试技术栈上下文
+- 代码块（类、函数）的输入和输出信息
 
 ### 高质量数据集生成
 
-## 相关资源
+- 统一提示词（Prompt）。统一工具-微调-评估底层的提示词。
+- 代码质量管道。诸如于代码复杂性、代码坏味道、测试坏味道、API 设计味道等。
+- 可扩展的质量阈。自定义规则、自定义阈值、自定义质量类型等。
+
+
+## 附：相关资源
 
 ### 开源 AI 辅助工具
 
diff --git a/code/finetune/finetune.ipynb b/code/finetune/finetune.ipynb
@@ -0,0 +1,129 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bf0068d-9f6f-4c61-bc08-47b9594254d2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/deepseek-ai/DeepSeek-Coder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14a9e9c5-07b0-4d82-9dde-06559bb14ea2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!pip install deepspeed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "# if you want to clone without large files – just their pointers\n",
+    "!apt install git-lfs"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "8fd2eb721d14953a"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f8817387-03de-4fb7-9d6f-63d41e1db328",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Make sure you have git-lfs installed (https://git-lfs.com)\n",
+    "!git lfs install\n",
+    "!git clone https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "!cd DeepSeek-Coder/finetune && pip install -r requirements.txt"
+   ],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "8ee1f87d90fc2a8e"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f42cbcf2-09a1-4560-9a61-33a16ad220ea",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "DATA_PATH=\"/openbayes/home/summary.jsonl\"\n",
+    "OUTPUT_PATH=\"/openbayes/home/output\"\n",
+    "\n",
+    "# NotImplementedError: Using RTX 3090 or 4000 series doesn't support faster communication broadband via P2P or IB. Please\n",
+    "# set `NCCL_P2P_DISABLE=\"1\"` and `NCCL_IB_DISABLE=\"1\" or use `accelerate launch` which will do this automatically.\n",
+    "# \n",
+    "# !NCCL_P2P_DISABLE=1\n",
+    "# !NCCL_IB_DISABLE=1\n",
+    "\n",
+    "MODEL_PATH=\"/openbayes/home/deepseek-coder-6.7b-instruct\"\n",
+    "\n",
+    "!cd DeepSeek-Coder/finetune && deepspeed finetune_deepseekcoder.py \\\n",
+    "    --model_name_or_path $MODEL_PATH \\\n",
+    "    --data_path $DATA_PATH \\\n",
+    "    --output_dir $OUTPUT_PATH \\\n",
+    "    --num_train_epochs 1 \\\n",
+    "    --model_max_length 768 \\\n",
+    "    --per_device_train_batch_size 16 \\\n",
+    "    --per_device_eval_batch_size 1 \\\n",
+    "    --gradient_accumulation_steps 4 \\\n",
+    "    --evaluation_strategy \"no\" \\\n",
+    "    --save_strategy \"no\" \\\n",
+    "    --save_steps 50 \\\n",
+    "    --save_total_limit 10 \\\n",
+    "    --learning_rate 4e-5 \\\n",
+    "    --warmup_steps 10 \\\n",
+    "    --logging_steps 1 \\\n",
+    "    --lr_scheduler_type \"cosine\" \\\n",
+    "    --gradient_checkpointing True \\\n",
+    "    --report_to \"tensorboard\" \\\n",
+    "    --deepspeed configs/ds_config_zero3.json \\\n",
+    "    --bf16 True"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/code/ide/.gitkeep b/code/ide/.gitkeep
diff --git a/code/server/requirements.txt b/code/server/requirements.txt
@@ -0,0 +1,13 @@
+accelerate==0.23.0
+bitsandbytes==0.41.1
+gradio==3.48.0
+protobuf==3.20.3
+# scipy==1.11.2
+sentencepiece==0.1.99
+spaces==0.16.1
+torch==2.0.0
+transformers==4.34.0
+fastapi
+uvicorn
+asyncio
+async_timeout
diff --git a/code/server/server-python38.py b/code/server/server-python38.py
@@ -0,0 +1,140 @@
+import os
+from threading import Thread
+from typing import Iterator, List, Tuple
+from urllib.request import Request
+
+import uvicorn
+from fastapi import FastAPI, Response
+from fastapi.responses import StreamingResponse
+from fastapi.exceptions import RequestValidationError
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+import requests
+from pydantic import BaseModel
+from starlette import status
+from starlette.responses import JSONResponse
+import async_timeout
+import asyncio
+import time
+
+MAX_MAX_NEW_TOKENS = 4096
+DEFAULT_MAX_NEW_TOKENS = 1024
+total_count = 0
+MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+
+if torch.cuda.is_available():
+    model_id = "/openbayes/input/input0/"
+    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    tokenizer.use_default_system_prompt = False
+
+
+class Message(BaseModel):
+    role: str
+    content: str
+
+
+class MessageInResponseChat(BaseModel):
+    message: Message
+
+
+class ChatResponse(BaseModel):
+    choices: List[MessageInResponseChat]
+    model: str
+
+
+class SimpleOpenAIBody(BaseModel):
+    messages: List[Message]
+    temperature: float
+    stream: bool
+
+
+GENERATION_TIMEOUT_SEC = 480
+
+
+async def stream_generate(
+        chat_history: List[Message],
+        max_new_tokens: int = 512,
+        temperature: float = 0.1,
+        top_p: float = 0.9,
+        top_k: int = 50,
+        repetition_penalty: float = 1,
+):
+    async with async_timeout.timeout(GENERATION_TIMEOUT_SEC):
+        try:
+            global total_count
+            total_count += 1
+            if total_count % 50 == 0:
+                os.system("nvidia-smi")
+
+            conversation = chat_history
+
+            input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
+            if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
+                input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+            input_ids = input_ids.to(model.device)
+
+            streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
+            generate_kwargs = dict(
+                {"input_ids": input_ids},
+                streamer=streamer,
+                max_new_tokens=max_new_tokens,
+                do_sample=False,
+                top_p=top_p,
+                top_k=top_k,
+                num_beams=1,
+                temperature=temperature,
+                repetition_penalty=repetition_penalty,
+                eos_token_id=32021
+            )
+            t = Thread(target=model.generate, kwargs=generate_kwargs)
+            t.start()
+
+            result = ""
+            outputs = []
+            for text in streamer:
+                outputs.append(text)
+                result = "".join(outputs)
+                # result = "".join(outputs).replace("<|EOT|>", "")
+
+            yield 'data:' + ChatResponse(
+                choices=[MessageInResponseChat(message=Message(role='assistant', content=result))],
+                model="autodev-deepseek").model_dump_json()
+
+            yield '\n\n'
+            time.sleep(0.2)
+            yield 'data:[DONE]'
+            print(result)
+
+        except asyncio.TimeoutError:
+            raise HTTPException(status_code=504, detail="Stream timed out")
+
+
+app = FastAPI()
+
+
+@app.exception_handler(RequestValidationError)
+async def validation_exception_handler(request: Request, exc: RequestValidationError):
+    exc_str = f'{exc}'.replace('\n', ' ').replace('   ', ' ')
+    print(f"{request}: {exc_str}")
+    content = {'status_code': 10422, 'message': exc_str, 'data': None}
+    return JSONResponse(content=content, status_code=status.HTTP_422_UNPROCESSABLE_ENTITY)
+
+
+@app.post("/api/chat", response_class=Response)
+async def root(body: SimpleOpenAIBody) -> StreamingResponse:
+    return StreamingResponse(stream_generate(body.messages, temperature=body.temperature),
+                             media_type="text/event-stream")
+
+
+if __name__ == "__main__":
+    try:
+        meta = requests.get('http://localhost:21999/gear-status', timeout=5).json()
+        url = meta['links'].get('auxiliary')
+        if url:
+            print("打开该链接访问:", url)
+    except Exception:
+        pass
+
+    uvicorn.run(app, host="0.0.0.0", port=8080)