Найти - Пользователи
Полная версия: Парсер ютуба выдал только одну ссылку
Начало » Python для новичков » Парсер ютуба выдал только одну ссылку
1
satfan
Это мой скрипт:
 # coding: utf8
# == pars_playlist.py
# == select short information from Yotube playlist -- 
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
print   "pars_playlist working..."
# == get first linkof playlist ==
url = raw_input('input link:')
url = "https://www.youtube.com/playlist?list=PLku9se_HAVOrs3p_jJzZAbV3BEODrhS3j"
fh = open('data.txt', 'w')
cur_num = 1		# index of reading page
try: 
		while True:
			# -- get curent page --
			print cur_num
			print url
			content = urlopen( url )
			page = content.read()	
				
			# -- cut head
			soup = BeautifulSoup(page)
			body = soup.find('body')
			# get current title
			span = body.find(id="ecw-title")
			title = span.getText()
			print title
			# get list of all links
			ol = body.find(id="playlist-autoscroll-list")
			lis = ol.findAll('li')
			# soup for current page
			for idx, li in enumerate(lis): # seek for picture
				li_title = li["data-video-title"] 
				if li_title  == title: break
			video_id = li["data-video-id"]
			# get picture information
			img = li.find('img')
			img_src = img['src']
			pos = img_src.find('?')
			if pos > 0: img_src = img_src[:pos]
			# get description of page
			descr = body.find(id="eow-description")
			descr_text = descr.getText()
			# save information
			fh.write(title.encode('utf8') + '\n')
			fh.write(video_id.encode('utf8') + '\n')
			fh.write(img_src.encode('utf8') + '\n')
			fh.write(descr_text.encode('utf8') + '\n')
			fh.write('\n')
			if idx >=len(lis) -1: break
			# get next url
			idx += 1
			li = lis[ idx ]
			anc = li.find('a')
			href = anc['href']
			url = "https://www.youtube.com" + href
			cur_num = cur_num + 1
		# == 05 end of work ==
except: 
	print "exception: " + str(cur_num)
	pass
finally:
	fh.close()
	print "ok"
=====================================
Парсер ничего не записал в файл: data.txt
FishHook
satfan
try, except и finally - это части составного оператора, они должны находиться на одном уровне отступов
This is a "lo-fi" version of our main content. To view the full version with more information, formatting and images, please click here.
Powered by DjangoBB