Форум сайта python.su
-2
Это мой скрипт:
# coding: utf8 # == pars_playlist.py # == select short information from Yotube playlist -- from urllib import urlopen from BeautifulSoup import BeautifulSoup print "pars_playlist working..." # == get first linkof playlist == url = raw_input('input link:') url = "https://www.youtube.com/playlist?list=PLku9se_HAVOrs3p_jJzZAbV3BEODrhS3j" fh = open('data.txt', 'w') cur_num = 1 # index of reading page try: while True: # -- get curent page -- print cur_num print url content = urlopen( url ) page = content.read() # -- cut head soup = BeautifulSoup(page) body = soup.find('body') # get current title span = body.find(id="ecw-title") title = span.getText() print title # get list of all links ol = body.find(id="playlist-autoscroll-list") lis = ol.findAll('li') # soup for current page for idx, li in enumerate(lis): # seek for picture li_title = li["data-video-title"] if li_title == title: break video_id = li["data-video-id"] # get picture information img = li.find('img') img_src = img['src'] pos = img_src.find('?') if pos > 0: img_src = img_src[:pos] # get description of page descr = body.find(id="eow-description") descr_text = descr.getText() # save information fh.write(title.encode('utf8') + '\n') fh.write(video_id.encode('utf8') + '\n') fh.write(img_src.encode('utf8') + '\n') fh.write(descr_text.encode('utf8') + '\n') fh.write('\n') if idx >=len(lis) -1: break # get next url idx += 1 li = lis[ idx ] anc = li.find('a') href = anc['href'] url = "https://www.youtube.com" + href cur_num = cur_num + 1 # == 05 end of work == except: print "exception: " + str(cur_num) pass finally: fh.close() print "ok" ===================================== Парсер ничего не записал в файл: data.txt
Отредактировано satfan (Март 26, 2019 12:20:26)
Офлайн
568
satfan
try, except и finally - это части составного оператора, они должны находиться на одном уровне отступов
Офлайн