@@ -235,9 +235,9 @@ You are strongly recommended to use a sandbox such as [docker](https://docs.dock
235235
236236` ` ` bash
237237# Mount the current directory to the container
238- # If you want to change the RAM address space limit (in MB, 128 GB by default): `--max-as-limit XXX`
239- # If you want to change the RAM data segment limit (in MB, 4 GB by default): `--max-data-limit`
240- # If you want to change the RAM stack limit (in MB, 4 MB by default): `--max-stack-limit`
238+ # If you want to change the RAM address space limit (in MB, 30 GB by default): `--max-as-limit XXX`
239+ # If you want to change the RAM data segment limit (in MB, 30 GB by default): `--max-data-limit`
240+ # If you want to change the RAM stack limit (in MB, 10 MB by default): `--max-stack-limit`
241241docker run -v $( pwd) :/app bigcodebench/bigcodebench-evaluate:latest --split [complete| instruct] --subset [full| hard] --samples samples-sanitized-calibrated.jsonl
242242
243243# If you only want to check the ground truths
@@ -259,6 +259,8 @@ Then, run the evaluation:
259259bigcodebench.evaluate --split [complete| instruct] --subset [full| hard] --samples samples-sanitized-calibrated.jsonl
260260# ...If you really don't want to check the ground truths
261261bigcodebench.evaluate --split [complete| instruct] --subset [full| hard] --samples samples-sanitized-calibrated.jsonl --no-gt
262+ # If you want to save the pass rate to a file
263+ bigcodebench.evaluate --split [complete| instruct] --subset [full| hard] --samples samples-sanitized-calibrated.jsonl --save_pass_rate
262264
263265# You are strongly recommended to use the following command to clean up the environment after evaluation:
264266pids=$( ps -u $( id -u) -o pid,comm | grep ' bigcodebench' | awk ' {print $1}' ) ; if [ -n \" $pids \" ]; then echo $pids | xargs -r kill ; fi ;
0 commit comments