Finer segmentation for search engines.
(self, sentence, HMM=True)
| 336 | yield x |
| 337 | |
| 338 | def cut_for_search(self, sentence, HMM=True): |
| 339 | """ |
| 340 | Finer segmentation for search engines. |
| 341 | """ |
| 342 | words = self.cut(sentence, HMM=HMM) |
| 343 | for w in words: |
| 344 | if len(w) > 2: |
| 345 | for i in xrange(len(w) - 1): |
| 346 | gram2 = w[i:i + 2] |
| 347 | if self.FREQ.get(gram2): |
| 348 | yield gram2 |
| 349 | if len(w) > 3: |
| 350 | for i in xrange(len(w) - 2): |
| 351 | gram3 = w[i:i + 3] |
| 352 | if self.FREQ.get(gram3): |
| 353 | yield gram3 |
| 354 | yield w |
| 355 | |
| 356 | def lcut(self, *args, **kwargs): |
| 357 | return list(self.cut(*args, **kwargs)) |