# Point this at the LLM you want to use
# Models can be found at https://huggingface.co/, e.g.
# https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf
#MODEL=/srv/ai/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf
# Change this to something secure that only legit clients know
#API_KEY=OpenMandriva
# Host and port to listen on (use 0.0.0.0 to listen on all IPv4 addresses)
HOST=127.0.0.1
PORT=8080
# Extra options passed to llama-server - e.g. GPU offload control
LLAMA_OPTIONS="--n-gpu-layers -1"
