Fetch the article title and analyze it
(self, doc)
| 222 | return None |
| 223 | |
| 224 | def get_title(self, doc): |
| 225 | """Fetch the article title and analyze it |
| 226 | """ |
| 227 | title = '' |
| 228 | title_element = self.parser.getElementsByTag(doc, tag='title') |
| 229 | # no title found |
| 230 | if title_element is None or len(title_element) == 0: |
| 231 | return title |
| 232 | |
| 233 | # title elem found |
| 234 | title_text = self.parser.getText(title_element[0]) |
| 235 | used_delimeter = False |
| 236 | |
| 237 | # split title with | |
| 238 | if '|' in title_text: |
| 239 | title_text = self.split_title(title_text, PIPE_SPLITTER) |
| 240 | used_delimeter = True |
| 241 | |
| 242 | # split title with - |
| 243 | if not used_delimeter and '-' in title_text: |
| 244 | title_text = self.split_title(title_text, DASH_SPLITTER) |
| 245 | used_delimeter = True |
| 246 | |
| 247 | # split title with _ |
| 248 | if not used_delimeter and '_' in title_text: |
| 249 | title_text = self.split_title(title_text, UNDERSCORE_SPLITTER) |
| 250 | |
| 251 | # split title with / |
| 252 | if not used_delimeter and '/' in title_text: |
| 253 | title_text = self.split_title(title_text, SLASH_SPLITTER) |
| 254 | used_delimeter = True |
| 255 | |
| 256 | # split title with » |
| 257 | if not used_delimeter and '»' in title_text: |
| 258 | title_text = self.split_title(title_text, ARROWS_SPLITTER) |
| 259 | used_delimeter = True |
| 260 | |
| 261 | # split title with : |
| 262 | if not used_delimeter and ':' in title_text: |
| 263 | title_text = self.split_title(title_text, COLON_SPLITTER) |
| 264 | used_delimeter = True |
| 265 | |
| 266 | title = MOTLEY_REPLACEMENT.replaceAll(title_text) |
| 267 | return title |
| 268 | |
| 269 | def split_title(self, title, splitter): |
| 270 | """Split the title to best part possible |
no test coverage detected