(llm_name: str, logger=BaseLogger(), config={})
| 61 | |
| 62 | |
| 63 | def load_llm(llm_name: str, logger=BaseLogger(), config={}): |
| 64 | if llm_name in ["gpt-4", "gpt-4o", "gpt-4-turbo"]: |
| 65 | logger.info("LLM: Using GPT-4") |
| 66 | return ChatOpenAI(temperature=0, model_name=llm_name, streaming=True) |
| 67 | elif llm_name == "gpt-3.5": |
| 68 | logger.info("LLM: Using GPT-3.5") |
| 69 | return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True) |
| 70 | elif llm_name == "claudev2": |
| 71 | logger.info("LLM: ClaudeV2") |
| 72 | return ChatBedrock( |
| 73 | model_id="anthropic.claude-v2", |
| 74 | model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024}, |
| 75 | streaming=True, |
| 76 | ) |
| 77 | elif llm_name.startswith(AWS_MODELS): |
| 78 | logger.info(f"LLM: {llm_name}") |
| 79 | return ChatBedrock( |
| 80 | model_id=llm_name, |
| 81 | model_kwargs={"temperature": 0.0, "max_tokens_to_sample": 1024}, |
| 82 | streaming=True, |
| 83 | ) |
| 84 | |
| 85 | elif len(llm_name): |
| 86 | logger.info(f"LLM: Using Ollama: {llm_name}") |
| 87 | return ChatOllama( |
| 88 | temperature=0, |
| 89 | base_url=config["ollama_base_url"], |
| 90 | model=llm_name, |
| 91 | streaming=True, |
| 92 | # seed=2, |
| 93 | top_k=10, # A higher value (100) will give more diverse answers, while a lower value (10) will be more conservative. |
| 94 | top_p=0.3, # Higher value (0.95) will lead to more diverse text, while a lower value (0.5) will generate more focused text. |
| 95 | num_ctx=3072, # Sets the size of the context window used to generate the next token. |
| 96 | ) |
| 97 | logger.info("LLM: Using GPT-3.5") |
| 98 | return ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", streaming=True) |
| 99 | |
| 100 | |
| 101 | def configure_llm_only_chain(llm): |
no test coverage detected