git clone https://github.com/ggerganov/llama.cpp cd llama.cpp make # M2 Max - 16 GB RAM wget -P ./models https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-16k-GGUF/resolve/main/openhermes-2.5-mistral-7b-16k.Q8_0.gguf ./server -m models/openhermes-2.5-mistral-7b-16k.Q8_0.gguf -c 16000 -ngl 32 # M1 - 8 GB RAM wget -P ./models https://huggingface.co/TheBloke/OpenHermes-2.5-Mistral-7B-16k-GGUF/resolve/main/openhermes-2.5-mistral-7b.Q4_K_M.gguf ./server -m models/openhermes-2.5-mistral-7b.Q4_K_M.gguf -c 2000 -ngl 32