( # this is useful for chatting with trained models in the command line. It perhaps ought to stream. Perhaps we ought to
prompt_path,
template_path,
gguf_model_path,
context_length,
modelid, # used for the tokenizer
llama_path="./llama.cpp", # customizable llama.cpp path
task_id=None,
**kwargs,
)
| 19 | |
| 20 | |
| 21 | def chat( # this is useful for chatting with trained models in the command line. It perhaps ought to stream. Perhaps we ought to |
| 22 | prompt_path, |
| 23 | template_path, |
| 24 | gguf_model_path, |
| 25 | context_length, |
| 26 | modelid, # used for the tokenizer |
| 27 | llama_path="./llama.cpp", # customizable llama.cpp path |
| 28 | task_id=None, |
| 29 | **kwargs, |
| 30 | ): |
| 31 | |
| 32 | with open(prompt_path, "r") as f: |
| 33 | prompt = f.read() |
| 34 | |
| 35 | with open(template_path, "r") as f: |
| 36 | template = f.read() |
| 37 | |
| 38 | # llama.cpp |
| 39 | if not os.path.exists(llama_path): |
| 40 | print("llama.cpp directory not found. Cloning repository...") |
| 41 | subprocess.run( |
| 42 | ["git", "clone", "https://github.com/ggml-org/llama.cpp.git"], check=True |
| 43 | ) |
| 44 | subprocess.run( |
| 45 | ["git", "checkout", "b775345d788ac16260e7eef49e11fe57ee5677f7"], |
| 46 | cwd="llama.cpp", |
| 47 | check=True |
| 48 | ) |
| 49 | |
| 50 | # Check if llama-server exists |
| 51 | llama_server_path = os.path.join(llama_path, "build", "bin", "llama-server") |
| 52 | if platform.system() == "Windows": |
| 53 | llama_server_path += ".exe" |
| 54 | |
| 55 | if not os.path.exists(llama_server_path): |
| 56 | print("llama-server not found. Building llama.cpp...") |
| 57 | |
| 58 | # Detect if NVIDIA GPU is available |
| 59 | has_nvidia_gpu = False |
| 60 | try: |
| 61 | result = subprocess.run(["nvidia-smi"], capture_output=False, text=True) |
| 62 | has_nvidia_gpu = result.returncode == 0 |
| 63 | except FileNotFoundError: |
| 64 | has_nvidia_gpu = False |
| 65 | |
| 66 | # Build with appropriate flags |
| 67 | build_cmd = ["cmake", "-B", "build"] |
| 68 | if has_nvidia_gpu: |
| 69 | build_cmd.append("-DGGML_CUDA=ON") |
| 70 | print("NVIDIA GPU detected. Building with CUDA support...") |
| 71 | else: |
| 72 | print("No NVIDIA GPU detected. Building CPU-only version...") |
| 73 | |
| 74 | # Run cmake configure |
| 75 | subprocess.run(build_cmd, cwd=llama_path, check=True, shell=True) |
| 76 | |
| 77 | # Build the project |
| 78 | subprocess.run( |
nothing calls this directly
no test coverage detected