UID10879
好友
帖子
阅读权限10
收听
最后登录1970-1-1
|

楼主 |
发表于 2021-7-28 07:33:17高大上手机用户
|
显示全部楼层
代码就是这段,以前可以解析的,最近不能用了
import email
import mimetypes
# 把mht文件转为html文件
def parse(file_name):
print(file_name)
# mht = open(file_name,'r',encoding='utf-8')
mht = open(file_name, 'r', encoding='gbk')
msg = email.message_from_file(mht)
mht.close()
# file_path =r'/home/ai/resume/resume/1rename/0rename/10rename/'
file_path = file_name.replace('.mht', '')
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
filename = part.get_filename()
if not filename:
ext = mimetypes.guess_extension(part.get_content_type())
# print(ext)
if ext == '.html':
try:
fp = open(file_path + ext, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
except Exception as e:
print(e)
|
|