# The container must be run with microsoft/vcredist layered.
# Execute the Llama CLI prompt:
turbo run -a ggerganov/llama.cpp,microsoft/vcredist,turbobuild/llama-3.1-8b-instruct-q4_k_m-imat-gguf --startup-file="C:\llama-avx2\llama-cli.exe" -- -m "C:\Models\Llama-3.1-8B-Instruct\llama-3.1-8b-instruct-q4_k_m-imat.gguf" -n 50 -p "The meaning to life and the universe is"
# Start the Llama Server with a model layer.
turbo run ggerganov/llama.cpp,microsoft/vcredist,turbobuild/llama-3.1-8b-instruct-q4_k_m-imat-gguf -- /C "C:\llama-avx2\llama-server.exe" -m "C:\Models\Llama-3.1-8B-Instruct\llama-3.1-8b-instruct-q4_k_m-imat.gguf" -n 50 --port 8180 --chat-template llama3
# Start the Llama Server with a downloaded model.
# Download llama-2-7b-chat.Q4_K_M.gguf to c:\llama-2-7b-chat.Q4_K_M.gguf from
# https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
turbo run ggerganov/llama.cpp,microsoft/vcredist -- /C "C:\llama-avx2\llama-server.exe" -m c:\llama-2-7b-chat.Q4_K_M.gguf -n 50 --port 8180 --chat-template llama2
No release notes
This repository does not have an EULA
Actions
Go to TurboScript