Executes the node's logic to fetch HTML content from a specified URL and update the state with this content.
(self, state)
| 91 | ) |
| 92 | |
| 93 | def execute(self, state): |
| 94 | """ |
| 95 | Executes the node's logic to fetch HTML content from a specified URL and |
| 96 | update the state with this content. |
| 97 | """ |
| 98 | self.logger.info(f"--- Executing {self.node_name} Node ---") |
| 99 | |
| 100 | input_keys = self.get_input_keys(state) |
| 101 | input_data = [state[key] for key in input_keys] |
| 102 | |
| 103 | source = input_data[0] |
| 104 | input_type = input_keys[0] |
| 105 | |
| 106 | handlers = { |
| 107 | "json_dir": self.handle_directory, |
| 108 | "xml_dir": self.handle_directory, |
| 109 | "csv_dir": self.handle_directory, |
| 110 | "pdf_dir": self.handle_directory, |
| 111 | "md_dir": self.handle_directory, |
| 112 | "pdf": self.handle_file, |
| 113 | "csv": self.handle_file, |
| 114 | "json": self.handle_file, |
| 115 | "xml": self.handle_file, |
| 116 | "md": self.handle_file, |
| 117 | } |
| 118 | |
| 119 | if input_type in handlers: |
| 120 | return handlers[input_type](state, input_type, source) |
| 121 | elif input_type == "local_dir": |
| 122 | return self.handle_local_source(state, source) |
| 123 | elif input_type == "url": |
| 124 | return self.handle_web_source(state, source) |
| 125 | else: |
| 126 | raise ValueError(f"Invalid input type: {input_type}") |
| 127 | |
| 128 | def handle_directory(self, state, input_type, source): |
| 129 | """ |