(batch_data, provider = "groq/llama3-70b-8192", api_token = None)
| 878 | return blocks |
| 879 | |
| 880 | def extract_blocks_batch(batch_data, provider = "groq/llama3-70b-8192", api_token = None): |
| 881 | api_token = os.getenv('GROQ_API_KEY', None) if not api_token else api_token |
| 882 | from litellm import batch_completion |
| 883 | messages = [] |
| 884 | |
| 885 | for url, html in batch_data: |
| 886 | variable_values = { |
| 887 | "URL": url, |
| 888 | "HTML": html, |
| 889 | } |
| 890 | |
| 891 | prompt_with_variables = PROMPT_EXTRACT_BLOCKS |
| 892 | for variable in variable_values: |
| 893 | prompt_with_variables = prompt_with_variables.replace( |
| 894 | "{" + variable + "}", variable_values[variable] |
| 895 | ) |
| 896 | |
| 897 | messages.append([{"role": "user", "content": prompt_with_variables}]) |
| 898 | |
| 899 | |
| 900 | responses = batch_completion( |
| 901 | model = provider, |
| 902 | messages = messages, |
| 903 | temperature = 0.01 |
| 904 | ) |
| 905 | |
| 906 | all_blocks = [] |
| 907 | for response in responses: |
| 908 | try: |
| 909 | blocks = extract_xml_data(["blocks"], response.choices[0].message.content)['blocks'] |
| 910 | blocks = json.loads(blocks) |
| 911 | |
| 912 | except Exception as e: |
| 913 | blocks = [{ |
| 914 | "index": 0, |
| 915 | "tags": ["error"], |
| 916 | "content": ["Error extracting blocks from the HTML content. Choose another provider/model or try again."], |
| 917 | "questions": ["What went wrong during the block extraction process?"] |
| 918 | }] |
| 919 | all_blocks.append(blocks) |
| 920 | |
| 921 | return sum(all_blocks, []) |
| 922 | |
| 923 | def merge_chunks_based_on_token_threshold(chunks, token_threshold): |
| 924 | """ |
nothing calls this directly
no test coverage detected
searching dependent graphs…