improve sohu extractor

2024-09-14 18:36:41 +02:00 · 2013-08-06 10:26:57 +08:00 · 2013-08-06 10:26:57 +08:00 · d5b00ee6e0
commit d5b00ee6e0
parent b5a6d40818
1 changed files with 2 additions and 0 deletions
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@ -31,6 +31,7 @@ def _real_extract(self, url):
        compiled = re.compile(pattern, re.DOTALL)
        title = self._search_regex(compiled, webpage, u'video title')
        title = clean_html(title).split('-')[0].strip()
        self.to_screen('Title: %s' % title)
        pattern = re.compile(r'var vid="(\d+)"')
        result = re.search(pattern, webpage)
        if not result:
@ -70,6 +71,7 @@ def _real_extract(self, url):
        base_url_3 = 'http://allot/?prot=prot&file=clipsURL[i]&new=su[i]'
        files_info = []
        for i in range(num_of_parts):
            self.to_screen('Geting json infomation of part %s/%s' % (i + 1, num_of_parts))
            middle_url = 'http://%s/?prot=%s&file=%s&new=%s' % (allot, prot, clipsURL[i], su[i])
            logging.info('middle url part %d: %s' % (i, middle_url))
            middle_info = urllib2.urlopen(middle_url).read().split('|')