hedgedoc-expire/hedgedoc-expire.py

353 lines
15 KiB
Python
Raw Normal View History

2024-05-18 18:31:30 +02:00
#!/bin/env python
import argparse
import email
import json
import smtplib
import ssl
import sys
from datetime import datetime, timezone, timedelta
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from os import getenv
2024-05-19 00:34:36 +02:00
from textwrap import dedent
from time import sleep
2024-05-18 18:31:30 +02:00
import humanize
import psycopg
from psycopg.rows import dict_row
2024-05-18 18:31:30 +02:00
class Config:
2024-05-19 12:35:42 +02:00
"""
Get config from environment variables
"""
2024-05-18 18:31:30 +02:00
def __init__(self):
self.verbose = False
self.revision_age = timedelta(days=14)
self.note_age = timedelta(days=95)
self.exclude = []
self.postgres_connection_string = getenv('POSTGRES_CONNSTRING', 'postgresql://hedgedoc:geheim@localhost:5432/hedgedoc')
2024-05-18 18:56:22 +02:00
2024-05-18 18:31:30 +02:00
self.smtp_hostname = getenv('SMTP_HOSTNAME', 'localhost')
2024-05-18 18:56:22 +02:00
self.smtp_port = int(getenv('SMTP_PORT', '587'))
2024-05-18 18:31:30 +02:00
self.smtp_username = getenv('SMTP_USERNAME', '')
self.smtp_password = getenv('SMTP_PASSWORD', '')
self.smtp_from = getenv('SMTP_FROM', '')
self.url = getenv('URL', 'http://localhost:3000')
class EmailSender:
2024-05-19 12:35:42 +02:00
"""
Send email message through SMTP
"""
2024-05-18 18:31:30 +02:00
def __init__(self, hostname: str, port: int, username: str, password: str, mail_from: str):
self.hostname = hostname
self.port = port
self.username = username
self.password = password
self.mail_from = mail_from
def send(self, message: email.message.Message) -> None:
2024-05-19 12:35:42 +02:00
"""
Using the configured SMTP coordinates, send the message out. The code assumes the submission protocol with
StartTLS enabled, and authentication required.
:param message: to be sent
:return:
"""
2024-05-25 12:21:29 +02:00
try:
smtp_server = smtplib.SMTP(self.hostname, port=self.port)
context = ssl.create_default_context()
smtp_server.starttls(context=context)
smtp_server.login(self.username, self.password)
smtp_server.send_message(message)
except Exception as e:
print(f'Unable to send mail through {self}: {e}')
raise e
2024-06-04 19:05:19 +02:00
print(f'Report email to {message["To"]} sent successfully.')
2024-05-25 12:21:29 +02:00
def __str__(self):
return f'EmailSender<{self.hostname},{self.port},{self.username},{self.mail_from}>'
2024-05-18 18:31:30 +02:00
class HedgedocExpire:
def __init__(self, config: Config, email_sender: EmailSender):
self.config = config
self.email_sender = email_sender
2024-05-18 18:31:30 +02:00
@staticmethod
def email_from_email_or_profile(row) -> str:
"""
Get the email address of the creator from a database row. If the email column is populated, use that, otherwise
try to extract it from the login profile. The profile is a JSON object that has an emails array. We're using the
first address from there.
:param row: database row as a dict with email and profile columns
:return: email address
"""
if row['email'] is not None:
return row['email']
profile = json.loads(row['profile'])
return profile['emails'][0]
2024-05-18 18:31:30 +02:00
def notes_to_be_expired(self, conn) -> list[any]:
"""
Get a list of all notes to be expired.
:return:
"""
cutoff = datetime.now(timezone.utc) - self.config.note_age
with conn.cursor(row_factory=dict_row) as cur:
cur.execute('''SELECT
"Notes"."alias",
"Notes"."content",
"Notes"."createdAt",
"Notes"."ownerId",
"Notes"."shortid",
"Notes"."id",
"Notes"."title",
"Notes"."updatedAt",
"Users"."email",
"Users"."profile"
FROM "Notes", "Users"
WHERE "Notes"."updatedAt" < %s
AND "Notes"."ownerId" = "Users"."id"
ORDER BY "Notes"."updatedAt"
''', [cutoff])
return cur.fetchall()
2024-05-18 18:31:30 +02:00
def revisions_to_be_expired(self, conn) -> list[any]:
"""
Obtain a list of revisions to be expired.
:param conn: the database connection
:return:
"""
cutoff = datetime.now(timezone.utc) - self.config.revision_age
with conn.cursor(row_factory=dict_row) as cur:
cur.execute('''SELECT
"Notes"."alias",
"Revisions"."createdAt",
"Users"."email",
"Users"."profile",
"Revisions"."id" as "revisionId",
"Notes"."id" as "noteId",
"Notes"."shortid" as "shortid",
"Notes"."title"
FROM "Revisions", "Notes", "Users"
WHERE "Revisions"."createdAt" < %s
AND "Revisions"."noteId" = "Notes"."id"
AND "Notes"."ownerId" = "Users"."id"
ORDER BY "Notes"."createdAt", "Revisions"."createdAt"
''', [cutoff])
return cur.fetchall()
2024-05-18 18:31:30 +02:00
def check_notes_to_be_expired(self, conn) -> str:
"""
2024-05-25 13:35:09 +02:00
Return a list of notes that will be expired.
:param conn: the database connection
2024-05-25 13:35:09 +02:00
:return: a multi-line text suitable for humans to read
"""
2024-05-25 13:35:09 +02:00
r = ''
cutoff = datetime.now(timezone.utc) - self.config.note_age
2024-05-25 13:35:09 +02:00
r += f'Notes to be deleted not changed since {cutoff} ({humanize.naturaldelta(self.config.note_age)}):\n'
for note in self.notes_to_be_expired(conn):
age = datetime.now(timezone.utc) - note['updatedAt']
url = self.config.url + '/' + (note["alias"] if note["alias"] is not None else note["shortid"])
2024-05-25 13:35:09 +02:00
r += f' {self.email_from_email_or_profile(note)} ({humanize.naturaldelta(age)}) {url}: {note["title"]}\n'
return r
2024-05-18 18:31:30 +02:00
def check_revisions_to_be_expired(self, conn) -> str:
"""
2024-05-25 13:35:09 +02:00
Return a list of revisions that will be expired.
:return: a multi-line text suitable for humans to read
"""
2024-05-25 13:35:09 +02:00
r = ''
cutoff = datetime.now(timezone.utc) - self.config.revision_age
2024-05-25 13:35:09 +02:00
r += f'Revisions to be deleted created before {cutoff} ({humanize.naturaldelta(self.config.revision_age)}):\n'
notes = {}
for row in self.revisions_to_be_expired(conn):
row['age'] = datetime.now(timezone.utc) - row['createdAt']
if row['noteId'] not in notes:
notes[row['noteId']] = []
notes[row['noteId']].append(row)
for revisionId, revisions in notes.items():
addr = self.email_from_email_or_profile(revisions[0])
url = self.config.url + '/' + (
revisions[0]["alias"] if revisions[0]["alias"] is not None else revisions[0]["shortid"])
2024-05-25 13:35:09 +02:00
r += f' {addr} {url}: {revisions[0]["title"]}\n'
for rev in revisions:
2024-05-25 13:35:09 +02:00
r += f' {humanize.naturaldelta(rev["age"])}: {rev["revisionId"]}\n'
return r
2024-05-18 18:31:30 +02:00
def expire_old_notes(self, conn) -> None:
"""
Email old notes to their owners, then delete them.
:param conn: the database connection
:return:
"""
with conn.cursor() as cur:
for note in self.notes_to_be_expired(conn):
try:
note_age = datetime.now(timezone.utc) - note['updatedAt']
msg = MIMEMultipart()
msg['From'] = self.email_sender.mail_from
msg['To'] = self.email_from_email_or_profile(note)
msg['Subject'] = f'Your HedgeDoc Note "{note["title"]}" has expired'
msg.attach(MIMEText(dedent(f'''\
You created the note titled "{note["title"]}" on {note["createdAt"]}.
It was lasted updated {note['updatedAt']}, {humanize.naturaldelta(note_age)} ago. We expire all notes
that have not been updated within {humanize.naturaldelta(self.config.note_age)}.
Please find attached the contents of the latest revision of your note.
The admin team for {self.config.url}
2024-06-07 14:40:54 +02:00
'''), 'plain', 'utf-8'))
md = MIMEText(note["content"], 'markdown', 'utf-8')
filename = note['title'].encode('ascii', 'ignore').decode('utf-8')
if len(filename) == 0:
filename = 'note'
md.add_header('Content-Disposition', f'attachment; filename={filename}.md')
msg.attach(md)
self.email_sender.send(msg)
2024-05-18 18:31:30 +02:00
# email backup of the note sent, now we can delete it
cur.execute('DELETE FROM "Notes" WHERE "id" = %s', [note["id"]])
conn.commit()
2024-05-18 18:31:30 +02:00
if self.config.verbose:
url = self.config.url + '/' + (note["alias"] if note["alias"] is not None else note["shortid"])
print(f'Note "{note["title"]}" ({url}) emailed to {msg["To"]}')
except Exception as e:
2024-06-04 18:58:35 +02:00
print(f'Unable to send email to {self.email_from_email_or_profile(note)}: {e}', file=sys.stderr)
2024-05-18 18:31:30 +02:00
def expire_old_revisions(self, conn) -> None:
"""
Removes all revision on all notes that have been modified earlier than age.
:param conn: the database connection
:return:
"""
cutoff = datetime.now(timezone.utc) - self.config.revision_age
with conn.cursor() as cur:
rows = list(cur.execute('DELETE FROM "Revisions" WHERE "createdAt" < %s RETURNING id', [cutoff]))
if self.config.verbose:
print(f'Deleted {len(rows)} old revisions')
conn.commit()
2024-05-18 18:31:30 +02:00
def cmd_check(self) -> None:
with psycopg.connect(self.config.postgres_connection_string) as conn:
if 'revision' not in self.config.exclude:
print(self.check_revisions_to_be_expired(conn))
elif self.config.verbose:
print("Revisions were excluded from check, not checking.\n")
if 'note' not in self.config.exclude:
print(self.check_notes_to_be_expired(conn))
elif self.config.verbose:
print("Notes were excluded from check, not checking.\n")
2024-05-19 12:35:42 +02:00
def cmd_emailcheck(self) -> None:
with psycopg.connect(self.config.postgres_connection_string) as conn:
report = ''
if 'revision' not in self.config.exclude:
report += self.check_revisions_to_be_expired(conn)
else:
report += "Revisions were excluded from check.\n"
if 'note' not in self.config.exclude:
report += self.check_notes_to_be_expired(conn)
else:
report += "Notes were excluded from check.\n"
msg = MIMEMultipart()
msg['From'] = self.email_sender.mail_from
msg['To'] = self.email_sender.mail_from
2024-05-25 13:35:09 +02:00
msg['Subject'] = f'Hedgedoc Expire: Report'
msg.attach(MIMEText(dedent(f'''\
2024-05-25 13:35:09 +02:00
This report shows which notes and revisions would be deleted if expire would be run now.
''') + report + dedent(f'''\
The admin team for {self.config.url}
''')))
self.email_sender.send(msg)
def cmd_expire(self) -> None:
with psycopg.connect(self.config.postgres_connection_string) as conn:
if 'revision' not in self.config.exclude:
self.expire_old_revisions(conn)
elif self.config.verbose:
print("Revisions were excluded from action, not expiring.\n")
if 'note' not in self.config.exclude:
self.expire_old_notes(conn)
elif self.config.verbose:
print("Notes were excluded from action, not expiring.\n")
2024-05-18 18:31:30 +02:00
def main():
2024-05-18 18:31:30 +02:00
parser = argparse.ArgumentParser(
prog='hedgedoc-expire',
formatter_class=argparse.RawDescriptionHelpFormatter,
description=dedent('''\
Remove old notes and revisions from Hedgedoc
Notes that have not been updated in the specified time will be emailed to the creator and then deleted.
Revisions of notes that have been created before the specified time will be deleted.
'''),
epilog=dedent('''\
command is one of:
- check: print a list of revisions and notes to be expired
- cron: run expire every 24 hours
2024-05-25 13:35:09 +02:00
- emailcheck: send am email from the configured sender to themselves with the the check report
- expire: expire old revisions and untouched notes
See https://git.hamburg.ccc.de/CCCHH/hedgedoc-expire
''')
)
2024-05-19 00:34:36 +02:00
parser.add_argument('-n', '--notes', metavar='DAYS', type=float, default=95,
2024-05-18 19:19:06 +02:00
help='remove all notes not changed in these many days')
2024-05-18 22:04:57 +02:00
parser.add_argument('-r', '--revisions', metavar='DAYS', type=float, default=14,
2024-05-18 19:19:06 +02:00
help='remove all revisions created more than these many days ago')
parser.add_argument('command', choices=['check', 'cron', 'emailcheck', 'expire'], default='check', nargs='?',
help='action to perform')
parser.add_argument('-v', '--verbose', action='store_true', default=False,
help='print more info while running')
parser.add_argument('--exclude', nargs="+", choices=['revision', 'note'],
help='Type ("revision" or "note") to exclude from the action.')
2024-05-18 18:31:30 +02:00
args = parser.parse_args()
config = Config()
config.note_age = timedelta(days=args.notes)
config.revision_age = timedelta(days=args.revisions)
config.verbose = args.verbose
if (args.exclude):
config.exclude = args.exclude
print(config.exclude)
2024-05-19 12:35:42 +02:00
mail = EmailSender(config.smtp_hostname, config.smtp_port, config.smtp_username, config.smtp_password,
config.smtp_from)
hedgedoc_expire = HedgedocExpire(config, mail)
if args.command == 'check':
hedgedoc_expire.cmd_check()
elif args.command == 'cron':
while True:
next_expire = datetime.now().replace(hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
if args.verbose:
print(f'Next expire execution: {next_expire}')
seconds = (next_expire - datetime.now()).total_seconds()
if seconds > 0:
sleep(seconds)
hedgedoc_expire.cmd_expire()
elif args.command == 'emailcheck':
hedgedoc_expire.cmd_emailcheck()
elif args.command == 'expire':
hedgedoc_expire.cmd_expire()
else:
parser.print_help()
2024-05-18 18:31:30 +02:00
if __name__ == '__main__':
main()