Изучаю python, и в качестве оболочки использую ipython.
Интересует вопрос, как настроить в python sys.getdefaultencoding() ?
В скриптах использу это:
-*- coding: utf8 -*-
:wq
-*- coding: utf8 -*-
#!/usr/bin/env python
# -*-coding: utf8 -*-
import sys,os
sys.setdefaultencoding('utf8')
AttributeError: 'module' object has no attribute 'setdefaultencoding'
Python 2.7.1+
Python 3.2
LANG=ru_RU.UTF-8
LANGUAGE=ru_RU:en
LC_CTYPE="ru_RU.UTF-8"
LC_NUMERIC="ru_RU.UTF-8"
LC_TIME="ru_RU.UTF-8"
LC_COLLATE="ru_RU.UTF-8"
LC_MONETARY="ru_RU.UTF-8"
LC_MESSAGES="ru_RU.UTF-8"
LC_PAPER="ru_RU.UTF-8"
LC_NAME="ru_RU.UTF-8"
LC_ADDRESS="ru_RU.UTF-8"
LC_TELEPHONE="ru_RU.UTF-8"
LC_MEASUREMENT="ru_RU.UTF-8"
LC_IDENTIFICATION="ru_RU.UTF-8"
LC_ALL=
C:\Documents and Settings\username>python -S
Python 2.6 (r26:66721, Oct 2 2008, 11:35:03) [MSC v.1500 32 bit (Intel)] on win
32
>>> import sys
>>> sys.setdefaultencoding()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: setdefaultencoding() takes exactly 1 argument (0 given)
>>> sys.setdefaultencoding(u"UTF-8")
>>>
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os,tarfile,time,gzip,stat,sys
# vars
files_list = list()
def pack(_dest, _source):
pack_name = _dest + "" + str(time.strftime("%d.%m.%Y", time.localtime())) + ".tar.bz2"
_pack = tarfile.open(pack_name, 'w|bz2')
for name in _source:
_pack.add(name)
_pack.close()
def get_info(file_name):
time_format = "%d-%m-%Y"
file_stat = os.stat(file_name)
mod_time = time.strftime(time_format, time.localtime(file_stat[stat.ST_MTIME]))
access_time = time.strftime(time_format, time.localtime(file_stat[stat.ST_ATIME]))
return file_name, mod_time, access_time
def check_extention(file_name):
_stat = get_info(file_name)
try:
extention = _stat[0].split(".")[1]
if _stat[0].split(".")[1] in ('doc', 'docx', 'xls', 'xlsx'):
return 1
else:
return 0
except IndexError as err:
print(err)
def get_dirs(_path = "/backup/users/"):
dirs_list = list()
input_path = os.listdir(_path)
try:
for cur_dir in input_path:
dirs_list.append(cur_dir + "/")
except OSError as err:
print err
return dirs_list
def prnt(_list):
for name in _list:
print name
def main():
try:
for root,dirs,files in os.walk("/backup/users/Науменко Аня"):
for name in files:
if (check_extention(os.path.join(root,name))):
#tmp = str(os.path.join(root,name)).encode("utf8")
tmp = os.path.join(root,name)
files_list.append(os.path.join(tmp))
except OSError as err:
print err
pack("/backup/arch/", files_list)
main()
#prnt(files_list)
In [15]: sys.getdefaultencoding()
Out[15]: 'ascii'
x_faf-pythoner_xво втором питоне нужно ставить u'абвг', потом применять .encode('нужная_кодировка')for root,dirs,files in os.walk("/backup/users/Науменко Аня"):
>>> s = u'абвг'
>>> print s
абвг
>>> s.encode('utf-8')
'\xd0\xb0\xd0\xb1\xd0\xb2\xd0\xb3'
>>>
x_faf-pythoner_xотносится к самому исходному файлу, чтобы правильно декодировать то, что в нём написано# -*- coding: utf-8 -*-
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os,tarfile,time,gzip,stat,sys
# vars
files_list = []
def pack(_dest, _source):
pack_name = "{0}{1}.tar.bz2".format(_dest, str(time.strftime("%d.%m.%Y", time.localtime())))
packed = tarfile.open(pack_name, 'w|bz2', format=tarfile.PAX_FORMAT)
for name in _source:
packed.add(name)
packed.close()
def get_info(file_name):
time_format = "%d-%m-%Y"
file_stat = os.stat(file_name)
mod_time = time.strftime(time_format, time.localtime(file_stat[stat.ST_MTIME]))
access_time = time.strftime(time_format, time.localtime(file_stat[stat.ST_ATIME]))
return file_name, mod_time, access_time
def check_extention(file_name):
need_extensions = ('.doc', '.docx', '.xls', '.xlsx')
root, file_extension = os.path.splitext(file_name)
return file_extension in need_extensions
def get_dirs(_path = "/backup/users/"):
dirs_list = []
try:
full_list = os.listdir(_path)
for cur_dir in full_list:
if os.path.isdir(cur_dir):
dirs_list.append(cur_dir)
except OSError as err:
print err
return dirs_list
def prnt(_list):
for name in _list:
print name
def main():
try:
for root, dirs, files in os.walk(u"D:\TEST\BACKUP\Науменко Аня"):
for name in files:
if check_extention(os.path.join(root,name)):
tmp = os.path.join(root,name)
files_list.append(os.path.join(tmp))
except OSError as err:
print err
pack(u"D:\TEST\BACKUP", files_list)
main()
#prnt(files_list)
tarfile.DEFAULT_FORMATСогласно этому абзацу из той же документации (пункт 12.5.5. Unicode issues):
The default format for creating archives. This is currently GNU_FORMAT.
The default value for encoding is the local character encoding. It is deduced from sys.getfilesystemencoding() and sys.getdefaultencoding(). In read mode, encoding is used exclusively to convert unicode names from a pax archive to strings in the local character encoding. In write mode, the use of encoding depends on the chosen archive format. In case of PAX_FORMAT, input names that contain non-ASCII characters need to be decoded before being stored as UTF-8 strings. The other formats do not make use of encoding unless unicode objects are used as input names. These are converted to 8-bit character strings before they are added to the archive.Я пришёл к выводу изменить формат на PAX.