#!/bin/env python import argparse import email import json import smtplib import ssl import sys from datetime import datetime, timezone, timedelta from email.mime.base import MIMEBase from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from os import getenv from textwrap import dedent from time import sleep import humanize import pgsql class Config: """ Get config from environment variables """ def __init__(self): self.verbose = False self.revision_age = timedelta(days=14) self.note_age = timedelta(days=95) self.postgres_hostname = getenv('POSTGRES_HOSTNAME', 'localhost') self.postgres_username = getenv('POSTGRES_USERNAME', 'hedgedoc') self.postgres_password = getenv('POSTGRES_PASSWORD', 'geheim') self.postgres_database = getenv('POSTGRES_DATABASE', 'hedgedoc') self.postgres_port = int(getenv('POSTGRES_PORT', '5432')) self.smtp_hostname = getenv('SMTP_HOSTNAME', 'localhost') self.smtp_port = int(getenv('SMTP_PORT', '587')) self.smtp_username = getenv('SMTP_USERNAME', '') self.smtp_password = getenv('SMTP_PASSWORD', '') self.smtp_from = getenv('SMTP_FROM', '') self.url = getenv('URL', 'http://localhost:3000') class EmailSender: """ Send email message through SMTP """ def __init__(self, hostname: str, port: int, username: str, password: str, mail_from: str): self.hostname = hostname self.port = port self.username = username self.password = password self.mail_from = mail_from def send(self, message: email.message.Message) -> None: """ Using the configured SMTP coordinates, send the message out. The code assumes the submission protocol with StartTLS enabled, and authentication required. :param message: to be sent :return: """ try: smtp_server = smtplib.SMTP(self.hostname, port=self.port) context = ssl.create_default_context() smtp_server.starttls(context=context) smtp_server.login(self.username, self.password) smtp_server.send_message(message) except Exception as e: print(f'Unable to send mail through {self}: {e}') raise e def __str__(self): return f'EmailSender<{self.hostname},{self.port},{self.username},{self.mail_from}>' class HedgedocExpire: def __init__(self, config: Config, email_sender: EmailSender): self.config = config self.email_sender = email_sender @staticmethod def email_from_email_or_profile(row) -> str: """ Get the email address of the creator from a database row. If the email column is populated, use that, otherwise try to extract it from the login profile. The profile is a JSON object that has an emails array. We're using the first address from there. :param row: database row as a dict with email and profile columns :return: email address """ if row['email'] is not None: return row['email'] profile = json.loads(row['profile']) return profile['emails'][0] def notes_to_be_expired(self, db) -> list[any]: """ Get a list of all notes to be expired. :return: """ notes = [] cutoff = datetime.now(timezone.utc) - self.config.note_age with db.prepare('''SELECT "Notes"."alias", "Notes"."content", "Notes"."createdAt", "Notes"."ownerId", "Notes"."shortid", "Notes"."id", "Notes"."title", "Notes"."updatedAt", "Users"."email", "Users"."profile" FROM "Notes", "Users" WHERE "Notes"."updatedAt" < $1 AND "Notes"."ownerId" = "Users"."id" ORDER BY "Notes"."updatedAt" ''') as notes_older_than: for row in notes_older_than(cutoff): notes.append({ 'alias': row.alias if row.alias is not None else row.shortid, 'content': row.content, 'createdAt': row.createdAt, 'email': row.email, "id": row.id, 'ownerId': row.ownerId, 'profile': row.profile, 'shortid': row.shortid, 'title': row.title, 'updatedAt': row.updatedAt }) return notes def revisions_to_be_expired(self, db) -> list[any]: """ Obtain a list of revisions to be expired. :param db: the database connection :return: """ revisions = [] cutoff = datetime.now(timezone.utc) - self.config.revision_age with db.prepare('''SELECT "Notes"."alias", "Revisions"."createdAt", "Users"."email", "Users"."profile", "Revisions"."id" as "revisionId", "Notes"."id" as "noteId", "Notes"."shortid" as "shortid", "Notes"."title" FROM "Revisions", "Notes", "Users" WHERE "Revisions"."createdAt" < $1 AND "Revisions"."noteId" = "Notes"."id" AND "Notes"."ownerId" = "Users"."id" ORDER BY "Notes"."createdAt", "Revisions"."createdAt" ''') as revs_older_than: for row in revs_older_than(cutoff): revisions.append({ 'alias': row.alias, 'createdAt': row.createdAt, 'email': row.email, 'noteId': row.noteId, 'profile': row.profile, 'revisionId': row.revisionId, 'shortid': row.shortid, 'title': row.title }) return revisions def check_notes_to_be_expired(self, db) -> None: """ Print a list of notes that will be expired. :param db: the database connection :return: """ cutoff = datetime.now(timezone.utc) - self.config.note_age print(f'Notes to be deleted not changed since {cutoff} ({humanize.naturaldelta(self.config.note_age)}):') for note in self.notes_to_be_expired(db): age = datetime.now(timezone.utc) - datetime.fromisoformat(note['updatedAt']) url = self.config.url + '/' + (note["alias"] if note["alias"] is not None else note["shortid"]) print(f' {self.email_from_email_or_profile(note)} ({humanize.naturaldelta(age)}) {url}: {note["title"]}') def check_revisions_to_be_expired(self, db) -> None: """ Print a list of revisions that will be expired. :return: """ cutoff = datetime.now(timezone.utc) - self.config.revision_age print(f'Revisions to be deleted created before {cutoff} ({humanize.naturaldelta(self.config.revision_age)}):') notes = {} for row in self.revisions_to_be_expired(db): row['age'] = datetime.now(timezone.utc) - datetime.fromisoformat(row['createdAt']) if row['noteId'] not in notes: notes[row['noteId']] = [] notes[row['noteId']].append(row) for revisionId, revisions in notes.items(): addr = self.email_from_email_or_profile(revisions[0]) url = self.config.url + '/' + ( revisions[0]["alias"] if revisions[0]["alias"] is not None else revisions[0]["shortid"]) print(f' {addr} {url}: {revisions[0]["title"]}') for rev in revisions: print(f' {humanize.naturaldelta(rev["age"])}: {rev["revisionId"]}') def expire_old_notes(self, db) -> None: """ Email old notes to their owners, then delete them. :param db: the database connection :return: """ with db.prepare('DELETE FROM "Notes" WHERE "id" = $1') as delete_statement: for note in self.notes_to_be_expired(db): try: note_age = datetime.now(timezone.utc) - datetime.fromisoformat(note['updatedAt']) msg = MIMEMultipart() msg['From'] = self.email_sender.mail_from msg['To'] = self.email_from_email_or_profile(note) msg['Subject'] = f'Your HedgeDoc Note "{note["title"]}" has expired' msg.attach(MIMEText(dedent(f'''\ You created the note titled "{note["title"]}" on {note["createdAt"]}. It was lasted updated {note['updatedAt']}, {humanize.naturaldelta(note_age)} ago. We expire all notes that have not been updated within {humanize.naturaldelta(self.config.note_age)}. Please find attached the contents of the latest revision of your note. The admin team for {self.config.url} '''))) md = MIMEBase('text', 'markdown') md.add_header('Content-Disposition', f'attachment; filename={note["title"]}.md') md.set_payload(note["content"]) msg.attach(md) self.email_sender.send(msg) # email backup of the note sent, now we can delete it delete_statement(note["id"]) if self.config.verbose: url = self.config.url + '/' + (note["alias"] if note["alias"] is not None else note["shortid"]) print(f'Note "{note["title"]}" ({url}) emailed to {msg["To"]}') except Exception as e: print(f'Unable to send email to {note["email"]}: {e}', file=sys.stderr) def expire_old_revisions(self, db) -> None: """ Removes all revision on all notes that have been modified earlier than age. :param db: the database connection :return: """ cutoff = datetime.now(timezone.utc) - self.config.revision_age with db.prepare('DELETE FROM "Revisions" WHERE "createdAt" < $1 RETURNING id') as delete: rows = list(delete(cutoff)) if self.config.verbose: print(f'Deleted {len(rows)} old revisions') def cmd_check(self) -> None: with pgsql.Connection((self.config.postgres_hostname, self.config.postgres_port), self.config.postgres_username, self.config.postgres_password) as db: self.check_revisions_to_be_expired(db) self.check_notes_to_be_expired(db) def cmd_emailcheck(self) -> None: msg = MIMEMultipart() msg['From'] = self.email_sender.mail_from msg['To'] = self.email_sender.mail_from msg['Subject'] = f'Hedgedoc Expire: Test Mail' msg.attach(MIMEText(dedent(f'''\ This is a test email to confirm proper configuration of the SMTP client. The admin team for {self.config.url} '''))) self.email_sender.send(msg) def cmd_expire(self) -> None: with pgsql.Connection((self.config.postgres_hostname, self.config.postgres_port), self.config.postgres_username, self.config.postgres_password) as db: self.expire_old_revisions(db) self.expire_old_notes(db) def main(): parser = argparse.ArgumentParser( prog='hedgedoc-expire', formatter_class=argparse.RawDescriptionHelpFormatter, description=dedent('''\ Remove old notes and revisions from Hedgedoc Notes that have not been updated in the specified time will be emailed to the creator and then deleted. Revisions of notes that have been created before the specified time will be deleted. '''), epilog=dedent('''\ command is one of: - check: print a list of revisions and notes to be expired - cron: run expire every 24 hours - expire: expire old revisions and untouched notes See https://git.hamburg.ccc.de/CCCHH/hedgedoc-expire ''') ) parser.add_argument('-n', '--notes', metavar='DAYS', type=float, default=95, help='remove all notes not changed in these many days') parser.add_argument('-r', '--revisions', metavar='DAYS', type=float, default=14, help='remove all revisions created more than these many days ago') parser.add_argument('command', choices=['check', 'cron', 'emailcheck', 'expire'], default='check', nargs='?', help='action to perform') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='print more info while running') args = parser.parse_args() config = Config() config.note_age = timedelta(days=args.notes) config.revision_age = timedelta(days=args.revisions) config.verbose = args.verbose mail = EmailSender(config.smtp_hostname, config.smtp_port, config.smtp_username, config.smtp_password, config.smtp_from) hedgedoc_expire = HedgedocExpire(config, mail) if args.command == 'check': hedgedoc_expire.cmd_check() elif args.command == 'cron': while True: next_expire = datetime.now().replace(hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1) if args.verbose: print(f'Next expire execution: {next_expire}') seconds = (next_expire - datetime.now()).total_seconds() if seconds > 0: sleep(seconds) hedgedoc_expire.cmd_expire() elif args.command == 'emailcheck': hedgedoc_expire.cmd_emailcheck() elif args.command == 'expire': hedgedoc_expire.cmd_expire() else: parser.print_help() if __name__ == '__main__': main()