i hotela izvinitsya za predydushyi vopros….
hotela prosto uznat,kak mozhno sdelat “indexing” v spiske?(ne znau,kak na russkom budet)a imenno:
mne dan spisok filmov v xml=> pri etom ya budu dolzhna podelit etu informaciu tak,chtoby informaciya o kazhdom filme byla v otdelnom file.
pozhaluista podskazhite kak byt dalshe……
vot,chto ya sdelala:
from lxml import etree
from collections import deque
import cPickle as pickle
tree = etree.parse( "movies_small.xml" )
root = tree.getroot()
movie_number = 0
def normalise(str):
return str.strip().lower()
def create_movie_dict(index, a_movie, movie_number):
mov_dict = {}
filename = "m"+str(movie_number)
for an_element in a_movie:
# key = an_element.tag
if an_element.tag == "title":
#building index for title
title_text = an_element.text
keywords = title_text.split()
for keyword in keywords:
print normalise(keyword)
if an_element.tag == "cast":
list_actors = []
for an_actor in an_element:
list_actors.append( an_actor.text )
mov_dict["cast"] = list_actors
elif an_element.tag == "reviews":
list_reviews = []
for a_review in an_element:
source = a_review.get("source")
score = a_review.get("source")
score = a_review.text
list_reviews.append( (source, score))
mov_dict["reviews"] = list_reviews
# value = list_reviews
#print an_element.tag
else:
mov_dict[an_element.tag] = an_element.text
# value = an_element.text
# a_movie_dict[key] = value
# return mov_dict
#print mov_dict
file = open('data/'+ filename, 'w')
pickle.dump(mov_dict, file)
file.close()
index = {}
for a_movie in root:
create_movie_dict(index, a_movie, movie_number)
movie_number += 1
# pickle index <= NE MOGI RAZOBRATSA KAK PRAVILNO VSE SDELAT
file = open('data/'+ filename, 'w')
pickle.dump(mov_dict, file)
file.close()