dvs
Давай выложим свои варианты – может кому польза будет.
Вот такое получилось.
# -*- coding: utf-8 -*-
import urllib
import re
import os
import commands
import sys
import xlrd
import sqlite3
WORK_URL = 'http://www.stat.kz/digital/bizness_registr/Pages/stat_reg.aspx'
RAR_SAVE_PATH = '/home/pill/py/rars'
XLS_UNPACK_PATH = '/home/pill/py/xls'
DB_PATH = '/home/pill/py/sql3_db'
def get_archives(path):
"downloads archives from work_url to specified location"
if not os.path.exists(path):
if raw_input("%s path dosen't exist, do you want to create it? (y/n) "%path) == 'y':
os.mkdir(path)
else:
sys.exit(0)
rars_list = []
try:
sock = urllib.urlopen(WORK_URL)
data = sock.read()
sock.close()
except IOError:
print "Couldn't connect to %s" %work_url
return None
pass
rars_list = [''.join(('http://www.stat.kz',x)) for x in re.findall(r'href="(.+?rar)"',data)]
filenames = []
print 'retriaving: ',
for url in rars_list:
filename = os.path.join(path,re.search(r'(\d+\.rar)',url).group(0))
filenames.append(filename)
try:
urllib.urlretrieve(url, filename)
except IOError:
print "Couldn't reatrive or save file %s" %filename
return None
print '.',
print
return path
def unpack_archives(in_path, out_path):
"unpack archives to the same location"
if not os.path.exists(out_path):
if raw_input("%s path dosen't exist, do you want to create it? (y/n) "%out_path) == 'y':
os.mkdir(out_path)
else:
sys.exit(0)
print 'Extracting: .',
filenames = [os.path.join(in_path, name) for name in os.listdir(in_path) if name.endswith('rar')]
for filename in filenames:
cmd = ' '.join(('rar e',filename, out_path))
(status, output) = commands.getstatusoutput(cmd)
if status: ## Error case, print the command's output to stderr and exit
sys.stderr.write(output)
return None
print '.',
print
return [os.path.join(out_path, re.search(r'\d+\.rar',x).group()) for x in filenames]
def read_and_save(in_path, db_path):
"reads xls data from files and saves them to database"
filenames = [os.path.join(in_path, name) for name in os.listdir(in_path) if name.endswith('xls')]
db_conn = sqlite3.connect(db_path)
db_cursor = db_conn.cursor()
def process_xls(file_path):
'Process one xls to db'
rb = xlrd.open_workbook(file_path, formatting_info=True)
sheet = rb.sheet_by_index(0)
table_name = sheet.row_values(0)[0].replace('\n','')
t = [table_name,]
t.extend(sheet.row_values(2))
t = [''.join(("'",item.replace('\n',''),"'")) for item in t]
t = tuple(t)
print 'xls: ',file_path
sql = "create table %s (%s text, %s text, %s text, %s text, %s text, %s text, %s text, %s text, %s text)"%t
try:
db_cursor.execute(sql)
db_conn.commit()
print 'Table created'
except:
print 'Table already exists'
i = 0
print '%i entrys to insert'%sheet.nrows
for rown in xrange(3, sheet.nrows):
values = [table_name,]
values.extend(sheet.row_values(rown))
values = [''.join(("'",item.replace('\n','').replace("'",'"'),"'")) for item in values]
values = tuple(values)
sql = "insert into %s values (%s,%s,%s,%s,%s,%s,%s,%s,%s)"%values
db_cursor.execute(sql)
if i%100 == 0:
db_conn.commit()
if i%1000 == 0:
print '.',
i += 1
db_conn.commit()
print
print '%d entrys has been inserted succefuly'%i
print
for filename in filenames:
process_xls(filename)
db_conn.close()
def worker():
print 'Start'
get_archives(RAR_SAVE_PATH)
unpack_archives(RAR_SAVE_PATH, XLS_UNPACK_PATH)
read_and_save(XLS_UNPACK_PATH, DB_PATH)
print 'Done'
def main():
worker()
if __name__ == '__main__':
main()
Так и не разобрался почему вариант:
sql = “insert into ? values (?,?,?,?,?,?,?,?,?)”
db_cursor.execute(sql,values)
ругается на Syntax Error
пришлось использовать:
sql = “insert into %s values (%s,%s,%s,%s,%s,%s,%s,%s,%s)”%values
db_cursor.execute(sql)
Зы: я жадный до экспы, потому если кто-то поругает (Стиль, код , структура все в копилку), буду очень признателен.