def decode(contents):
"""
decode gets any bytes and try transform it to all standart encode. All variants this function returns as list of strings
list standart encoding https://docs.python.org/3/library/codecs.html#standard-encodings
"""
encodings = [
"ascii","big5","big5hkscs","cp037","cp273","cp424","cp437","cp500","cp720","cp737","cp775","cp850","cp852",
"cp855","cp856","cp857","cp858","cp860","cp861","cp862","cp863","cp864","cp865","cp866","cp869","cp874","cp875",
"cp932","cp949","cp950","cp1006","cp1026","cp1125","cp1140","cp1250","cp1251","cp1252","cp1253","cp1254",
"cp1255","cp1256","cp1257","cp1258","euc_jp","euc_jis_2004","euc_jisx0213","euc_kr","gb2312","gbk","gb18030",
"hz","iso2022_jp","iso2022_jp_1","iso2022_jp_2","iso2022_jp_2004","iso2022_jp_3","iso2022_jp_ext","iso2022_kr",
"latin_1","iso8859_2","iso8859_3","iso8859_4","iso8859_5","iso8859_6","iso8859_7","iso8859_8","iso8859_9",
"iso8859_10","iso8859_11","iso8859_13","iso8859_14","iso8859_15","iso8859_16","johab","koi8_r","koi8_t",
"koi8_u","kz1048","mac_cyrillic","mac_greek","mac_iceland","mac_latin2","mac_roman","mac_turkish","ptcp154",
"shift_jis","shift_jis_2004","shift_jisx0213","utf_32","utf_32_be","utf_32_le","utf_16","utf_16_be",
"utf_16_le","utf_7","utf_8","utf_8_sig",
]
succeed = []
for e in encodings:
try:
contents.decode(e)
succeed.append(e)
except:
pass
return succeed
python中有一个很好的 chardet库。我为您的任务草拟了一份代码草案。我检查了输入中的文件
cp1251
,utf-8
它似乎有效。我以一种简单的方式做到了。我从这里获取了 python 的所有标准编码并这样做了:
设计成小萝卜的形式。