How to parse .mbox format with delphi to delete mail attachments?

155 views Asked by At

I would like to keep an archive of my e-mail conversations from the mailbox, but remove unnecessary attachments (which are usually graphics and text files), to make archive smaller. I wrote a simple parser that does for all downloaded mailboxes in a folder to remove anything that appears to be an attachment.

The code is very simple, and I think it can be done better. Besides, I don't know if I'm making a mistake or damaging the mbox file structure. Maybe there is a simpler method? Anyone could take a look at what I wrote?

  • is there simplier way (maybe outside delphi, maybe some ready library)?
  • can such code lead to irreversible damage to mbox format?
  • can I lose some e-mails because of such a code?

Thanks a lot for any tips.

program ParseMailToClearTXT;

{$APPTYPE CONSOLE}

uses
  SysUtils, StrUtils, Classes;

const mboxpath = 'e:\Downloads\Takeout\';

var FileIn, FileOut: TextFile;
    FileList: TstringList;
    Line, N_tmp: String;
    i: integer;

{ this is just a part for use to all files in directory }
procedure ListFileDir(Path: string; FileList: TStrings);
var
  SR: TSearchRec;
begin
  if FindFirst(Path + '*.*', faAnyFile, SR) = 0 then
  begin
    repeat
      if (SR.Attr <> faDirectory) then
      begin
        FileList.Add(SR.Name);
      end;
    until FindNext(SR) <> 0;
    FindClose(SR);
  end;
end;

begin

  FileList := TStringList.Create;
  ListFileDir(mboxpath,FileList);

  { ==== here starts parser ==== }

  for i := 0 to FileList.Count - 1
    do if pos('.mbox',FileList.Strings[i]) > 0
      then begin

  N_tmp := mboxpath+Trim(FileList.Strings[i]);
  Assign(FileIn, N_tmp);
  Assign(FileOut, N_tmp+'out.txt');
  Reset(FileIn);
  Rewrite(FileOut);

  repeat
    ReadLn(FileIn,Line);

    if (Pos('Content-Type: image',Line) > 0)
    or (Pos('Content-Type: application',Line) > 0)
    or (Pos('Content-Type: text/plain; charset=utf-8',Line) > 0)
    or (Pos('Content-Type: text/plain; charset="UTF-8"',Line) > 0)
    or (Pos('Content-Type: audio/mpeg',Line) > 0)
    or (Pos('Content-Type: application',Line) > 0)

      then begin
             WriteLn(FileOut, Line);
             repeat
               ReadLn(FileIn,Line);
             until (Length(Line) < 3) or (Eof(FileIn));
             ReadLn(FileIn,Line);
             WriteLn(FileOut, Line);
             repeat
               ReadLn(FileIn,Line);
             until (Length(Line) < 3) or (Eof(FileIn));
      end;

    if Pos('------',Line) > 0
      then begin
             WriteLn(FileOut, Line);
             repeat
               ReadLn(FileIn,Line);
             until {(Pos('From ',Line) > 0)
             or (Pos('filename',Line) > 0)}
             ((Pos('------',Line) > 0) and (RightStr(Line,2) = '--'))
             or (Eof(FileIn));
      end;
   WriteLn(FileOut, Line);
  until EOF(FileIn);
  CloseFile(FileIn);
  CloseFile(FileOut);
      end;
  FileList.Free;
end.
0

There are 0 answers