Extract text from a lxml result * if xpath_results is list, extract the text from each result and concat the list * if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml ) * if xpath_results is a string element, then it's al
(xpath_results, allow_none=False)
| 134 | |
| 135 | |
| 136 | def extract_text(xpath_results, allow_none=False): |
| 137 | """Extract text from a lxml result |
| 138 | |
| 139 | * if xpath_results is list, extract the text from each result and concat the list |
| 140 | * if xpath_results is a xml element, extract all the text node from it |
| 141 | ( text_content() method from lxml ) |
| 142 | * if xpath_results is a string element, then it's already done |
| 143 | """ |
| 144 | if isinstance(xpath_results, list): |
| 145 | # it's list of result : concat everything using recursive call |
| 146 | result = '' |
| 147 | for e in xpath_results: |
| 148 | result = result + extract_text(e) |
| 149 | return result.strip() |
| 150 | elif isinstance(xpath_results, ElementBase): |
| 151 | # it's a element |
| 152 | text = html.tostring( |
| 153 | xpath_results, encoding='unicode', method='text', with_tail=False |
| 154 | ) |
| 155 | text = text.strip().replace('\n', ' ') |
| 156 | return ' '.join(text.split()) |
| 157 | elif isinstance(xpath_results, (_ElementStringResult, _ElementUnicodeResult, str, Number, bool)): |
| 158 | return str(xpath_results) |
| 159 | elif xpath_results is None and allow_none: |
| 160 | return None |
| 161 | elif xpath_results is None and not allow_none: |
| 162 | raise ValueError('extract_text(None, allow_none=False)') |
| 163 | else: |
| 164 | raise ValueError('unsupported type') |
| 165 | |
| 166 | |
| 167 | def normalize_url(url, base_url): |
no outgoing calls
no test coverage detected