I currently have a script that generates email backups from an email account over an IMAP connection. This script downloads all emails in EML format, also creates subfolders and saves emails in their respective subfolders. I'm looking for a way to generate the PST file from what's been downloaded, and I've tried manipulating Outlook with Win32, but ran into difficulties in the process. Does anyone know a method through a library or using a program to achieve this?
*Code for downloading emails:
import imaplib
import email
from email.header import decode_header
import os
import re
# Configuración de la cuenta de correo
email_address = '[email protected]'
password = 'pass'
server = 'server.serveremail.com'
# Conexión al servidor IMAP de Rackspace
mail = imaplib.IMAP4_SSL(server)
# Inicio de sesión
mail.login(email_address, password)
# Obtiene una lista de todas las carpetas
status, folders = mail.list()
# Carpeta para almacenar los archivos de respaldo
backup_root_folder = 'backup_emails'
backup_folder = ''
os.makedirs(backup_root_folder, exist_ok=True)
# Recorre todas las carpetas y respalda los mensajes de cada una
for folder_info in folders:
print(folder_info)
# Decodifica la información de la carpeta
folder_info_decoded = str(folder_info)
folder_name_regex = re.compile(r'\".\" \"?INBOX(\.([^\"]+))?(\"|\')?')
# Extrae el nombre de la carpeta usando expresiones regulares
match = folder_name_regex.search(folder_info_decoded)
if match:
folder_name = match.group(2) if match.group(2) else "INBOX"
# Verifica si el nombre de la carpeta contiene caracteres especiales
if re.search(r'[<>:"/\\|?*]', folder_name):
print(f"La carpeta '{folder_name}' contiene caracteres especiales y será omitida.")
continue
print("************* SIPASO***** " + folder_name)
# Crea la carpeta de respaldo correspondiente
if folder_name != "INBOX":
folder_name = folder_name.replace(".", os.path.sep)
folder_name = folder_name.replace("'", "")
backup_folder = os.path.join(backup_root_folder, folder_name)
print(backup_folder)
os.makedirs(backup_folder, exist_ok=True)
# Selecciona la carpeta actual
try:
status, _ = mail.select(folder_name)
if status != 'OK':
print(f"No se pudo seleccionar la carpeta {folder_name}. Estado: {status}")
continue
else:
print(status+" si seleccionamos----------------****>>>"+folder_name)
except Exception as e:
print(f"Error al seleccionar la carpeta {folder_name}: {e}")
continue
# Busca todos los mensajes en la carpeta actual
try:
status, messages = mail.uid('search', None, 'ALL')
if status != 'OK':
print(f"No se pudo buscar mensajes en la carpeta {folder_name}. Estado: {status}")
continue
else:
print('si se obtuvieron los mensajes')
except Exception as e:
print(f"Error al buscar mensajes en la carpeta {folder_name}: {e}")
continue
print('*********************************************************************************')
# Descarga cada mensaje y guarda como archivos .eml en la carpeta de respaldo
for msg_id in messages[0].split():
_, msg_data = mail.uid('fetch', msg_id, '(RFC822)')
msg = email.message_from_bytes(msg_data[0][1])
# Decodifica el asunto del mensaje para usarlo como nombre de archivo
if msg.get('Subject'):
subject, encoding = decode_header(msg.get('Subject'))[0]
if isinstance(subject, bytes):
subject = subject.decode(encoding or 'utf-8')
else:
subject = 'Sin asunto'
# Reemplaza los caracteres no permitidos en el nombre del archivo
cleaned_subject = re.sub(r'[<>:"/\\|?*]', '', subject)
# Elimina caracteres específicos no permitidos
invalid_chars = ['', '\uf0fc', '\t']
for char in invalid_chars:
cleaned_subject = cleaned_subject.replace(char, '')
backup_file_path = ''
folder_name = folder_name.replace(".", os.path.sep)
folder_name = folder_name.replace("'", "")
# Guarda el mensaje en un archivo .eml en la carpeta de respaldo
if folder_name != "INBOX":
backup_folder = os.path.join(backup_root_folder, folder_name)
backup_file_path = os.path.join(backup_folder, f'{cleaned_subject}.eml')
else:
backup_file_path = os.path.join(backup_root_folder, f'{cleaned_subject}.eml')
# Imprime el nombre de archivo antes de intentar guardarlo
print("Guardando mensaje como:", backup_file_path)
# Abre el archivo con el argumento 'wb' configurado para manejar todos los caracteres
with open(backup_file_path, 'wb') as backup_file:
backup_file.write(msg_data[0][1])
else:
print(f"No se pudo obtener el nombre de la carpeta:")
# Cierra la conexión
mail.logout()
print("Backup completado. Mensajes guardados en las carpetas de respaldo.")
Failed attempt to create PST file:
import os
import win32com.client
def create_pst_from_eml(source_folder, pst_file_path):
Outlook = win32com.client.Dispatch("Outlook.Application").GetNamespace("MAPI")
if not os.path.exists(pst_file_path):
Outlook.AddStore(pst_file_path)
try:
PSTFolderObj = find_pst_folder(Outlook, pst_file_path)
for root, _, files in os.walk(source_folder):
for file_name in files:
if file_name.endswith('.eml'):
eml_file_path = os.path.join(root, file_name)
with open(eml_file_path, 'rb') as eml_file:
eml_data = eml_file.read()
msg = BytesParser(policy=policy.default).parsebytes(eml_data)
PSTFolderObj.Items.Add(msg)
except Exception as exc:
print(exc)
finally:
Outlook.RemoveStore(PSTFolderObj)
def find_pst_folder(OutlookObj, pst_filepath):
for Store in OutlookObj.Stores:
if Store.IsDataFileStore and Store.FilePath == pst_filepath:
return Store.GetRootFolder()
return None
def enumerate_folders(FolderObj):
for ChildFolder in FolderObj.Folders:
enumerate_folders(ChildFolder)
iterate_messages(FolderObj)
def iterate_messages(FolderObj):
for item in FolderObj.Items:
print("***************************************")
print(item.SenderName)
print(item.SenderEmailAddress)
print(item.SentOn)
print(item.To)
print(item.CC)
print(item.BCC)
print(item.Subject)
count_attachments = item.Attachments.Count
if count_attachments > 0:
for att in range(count_attachments):
print(item.Attachments.Item(att + 1).Filename)
if __name__ == "__main__":
source_folder = r"C:\Users\\Desktop\respaldo\backup_emails"
pst_file_path = r"C:\Users\\Desktop\respaldo\backup_emails.pst"
create_pst_from_eml(source_folder, pst_file_path)
Additional comments: The first script works on Windows and downloads the emails in EML format. The second script attempts to create a PST file from the downloaded EML files, but has encountered problems in the process. I appreciate any suggestions or solutions that can help resolve this issue.