2024-05-18 18:31:30 +02:00
|
|
|
#!/bin/env python
|
|
|
|
import argparse
|
|
|
|
import email
|
|
|
|
import json
|
|
|
|
import smtplib
|
|
|
|
import ssl
|
|
|
|
import sys
|
|
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
from email.mime.base import MIMEBase
|
|
|
|
from email.mime.multipart import MIMEMultipart
|
|
|
|
from email.mime.text import MIMEText
|
|
|
|
from os import getenv
|
2024-05-19 00:34:36 +02:00
|
|
|
from textwrap import dedent
|
2024-05-20 12:09:04 +02:00
|
|
|
from time import sleep
|
2024-05-18 18:31:30 +02:00
|
|
|
|
|
|
|
import humanize
|
|
|
|
import pgsql
|
|
|
|
|
|
|
|
|
|
|
|
class Config:
|
2024-05-19 12:35:42 +02:00
|
|
|
"""
|
|
|
|
Get config from environment variables
|
|
|
|
"""
|
|
|
|
|
2024-05-18 18:31:30 +02:00
|
|
|
def __init__(self):
|
2024-05-20 12:09:04 +02:00
|
|
|
self.verbose = False
|
|
|
|
self.revision_age = timedelta(days=14)
|
|
|
|
self.note_age = timedelta(days=95)
|
|
|
|
|
2024-05-18 18:56:22 +02:00
|
|
|
self.postgres_hostname = getenv('POSTGRES_HOSTNAME', 'localhost')
|
|
|
|
self.postgres_username = getenv('POSTGRES_USERNAME', 'hedgedoc')
|
|
|
|
self.postgres_password = getenv('POSTGRES_PASSWORD', 'geheim')
|
|
|
|
self.postgres_database = getenv('POSTGRES_DATABASE', 'hedgedoc')
|
|
|
|
self.postgres_port = int(getenv('POSTGRES_PORT', '5432'))
|
|
|
|
|
2024-05-18 18:31:30 +02:00
|
|
|
self.smtp_hostname = getenv('SMTP_HOSTNAME', 'localhost')
|
2024-05-18 18:56:22 +02:00
|
|
|
self.smtp_port = int(getenv('SMTP_PORT', '587'))
|
2024-05-18 18:31:30 +02:00
|
|
|
self.smtp_username = getenv('SMTP_USERNAME', '')
|
|
|
|
self.smtp_password = getenv('SMTP_PASSWORD', '')
|
|
|
|
self.smtp_from = getenv('SMTP_FROM', '')
|
|
|
|
self.url = getenv('URL', 'http://localhost:3000')
|
|
|
|
|
|
|
|
|
|
|
|
class EmailSender:
|
2024-05-19 12:35:42 +02:00
|
|
|
"""
|
|
|
|
Send email message through SMTP
|
|
|
|
"""
|
|
|
|
|
2024-05-18 18:31:30 +02:00
|
|
|
def __init__(self, hostname: str, port: int, username: str, password: str, mail_from: str):
|
|
|
|
self.hostname = hostname
|
|
|
|
self.port = port
|
|
|
|
self.username = username
|
|
|
|
self.password = password
|
|
|
|
self.mail_from = mail_from
|
|
|
|
|
|
|
|
def send(self, message: email.message.Message) -> None:
|
2024-05-19 12:35:42 +02:00
|
|
|
"""
|
|
|
|
Using the configured SMTP coordinates, send the message out. The code assumes the submission protocol with
|
|
|
|
StartTLS enabled, and authentication required.
|
|
|
|
:param message: to be sent
|
|
|
|
:return:
|
|
|
|
"""
|
2024-05-25 12:21:29 +02:00
|
|
|
try:
|
|
|
|
smtp_server = smtplib.SMTP(self.hostname, port=self.port)
|
|
|
|
context = ssl.create_default_context()
|
|
|
|
smtp_server.starttls(context=context)
|
|
|
|
smtp_server.login(self.username, self.password)
|
|
|
|
smtp_server.send_message(message)
|
|
|
|
except Exception as e:
|
|
|
|
print(f'Unable to send mail through {self}: {e}')
|
|
|
|
raise e
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return f'EmailSender<{self.hostname},{self.port},{self.username},{self.mail_from}>'
|
2024-05-18 18:31:30 +02:00
|
|
|
|
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
class HedgedocExpire:
|
|
|
|
def __init__(self, config: Config, email_sender: EmailSender):
|
|
|
|
self.config = config
|
|
|
|
self.email_sender = email_sender
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
@staticmethod
|
|
|
|
def email_from_email_or_profile(row) -> str:
|
|
|
|
"""
|
|
|
|
Get the email address of the creator from a database row. If the email column is populated, use that, otherwise
|
|
|
|
try to extract it from the login profile. The profile is a JSON object that has an emails array. We're using the
|
|
|
|
first address from there.
|
|
|
|
:param row: database row as a dict with email and profile columns
|
|
|
|
:return: email address
|
|
|
|
"""
|
|
|
|
if row['email'] is not None:
|
|
|
|
return row['email']
|
|
|
|
profile = json.loads(row['profile'])
|
|
|
|
return profile['emails'][0]
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def notes_to_be_expired(self, db) -> list[any]:
|
|
|
|
"""
|
|
|
|
Get a list of all notes to be expired.
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
notes = []
|
|
|
|
cutoff = datetime.now(timezone.utc) - self.config.note_age
|
|
|
|
with db.prepare('''SELECT
|
|
|
|
"Notes"."alias",
|
|
|
|
"Notes"."content",
|
|
|
|
"Notes"."createdAt",
|
|
|
|
"Notes"."ownerId",
|
|
|
|
"Notes"."shortid",
|
|
|
|
"Notes"."id",
|
|
|
|
"Notes"."title",
|
|
|
|
"Notes"."updatedAt",
|
|
|
|
"Users"."email",
|
|
|
|
"Users"."profile"
|
|
|
|
FROM "Notes", "Users"
|
|
|
|
WHERE "Notes"."updatedAt" < $1
|
|
|
|
AND "Notes"."ownerId" = "Users"."id"
|
|
|
|
ORDER BY "Notes"."updatedAt"
|
|
|
|
''') as notes_older_than:
|
|
|
|
for row in notes_older_than(cutoff):
|
|
|
|
notes.append({
|
|
|
|
'alias': row.alias if row.alias is not None else row.shortid,
|
|
|
|
'content': row.content,
|
|
|
|
'createdAt': row.createdAt,
|
|
|
|
'email': row.email,
|
|
|
|
"id": row.id,
|
|
|
|
'ownerId': row.ownerId,
|
|
|
|
'profile': row.profile,
|
|
|
|
'shortid': row.shortid,
|
|
|
|
'title': row.title,
|
|
|
|
'updatedAt': row.updatedAt
|
|
|
|
})
|
|
|
|
return notes
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def revisions_to_be_expired(self, db) -> list[any]:
|
|
|
|
"""
|
|
|
|
Obtain a list of revisions to be expired.
|
|
|
|
:param db: the database connection
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
revisions = []
|
|
|
|
cutoff = datetime.now(timezone.utc) - self.config.revision_age
|
|
|
|
with db.prepare('''SELECT
|
|
|
|
"Notes"."alias",
|
|
|
|
"Revisions"."createdAt",
|
|
|
|
"Users"."email",
|
|
|
|
"Users"."profile",
|
|
|
|
"Revisions"."id" as "revisionId",
|
|
|
|
"Notes"."id" as "noteId",
|
|
|
|
"Notes"."shortid" as "shortid",
|
|
|
|
"Notes"."title"
|
|
|
|
FROM "Revisions", "Notes", "Users"
|
|
|
|
WHERE "Revisions"."createdAt" < $1
|
|
|
|
AND "Revisions"."noteId" = "Notes"."id"
|
|
|
|
AND "Notes"."ownerId" = "Users"."id"
|
|
|
|
ORDER BY "Notes"."createdAt", "Revisions"."createdAt"
|
|
|
|
''') as revs_older_than:
|
|
|
|
for row in revs_older_than(cutoff):
|
|
|
|
revisions.append({
|
|
|
|
'alias': row.alias,
|
|
|
|
'createdAt': row.createdAt,
|
|
|
|
'email': row.email,
|
|
|
|
'noteId': row.noteId,
|
|
|
|
'profile': row.profile,
|
|
|
|
'revisionId': row.revisionId,
|
|
|
|
'shortid': row.shortid,
|
|
|
|
'title': row.title
|
|
|
|
})
|
|
|
|
return revisions
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def check_notes_to_be_expired(self, db) -> None:
|
|
|
|
"""
|
|
|
|
Print a list of notes that will be expired.
|
|
|
|
:param db: the database connection
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
cutoff = datetime.now(timezone.utc) - self.config.note_age
|
|
|
|
print(f'Notes to be deleted not changed since {cutoff} ({humanize.naturaldelta(self.config.note_age)}):')
|
|
|
|
for note in self.notes_to_be_expired(db):
|
|
|
|
age = datetime.now(timezone.utc) - datetime.fromisoformat(note['updatedAt'])
|
|
|
|
url = self.config.url + '/' + (note["alias"] if note["alias"] is not None else note["shortid"])
|
|
|
|
print(f' {self.email_from_email_or_profile(note)} ({humanize.naturaldelta(age)}) {url}: {note["title"]}')
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def check_revisions_to_be_expired(self, db) -> None:
|
|
|
|
"""
|
|
|
|
Print a list of revisions that will be expired.
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
cutoff = datetime.now(timezone.utc) - self.config.revision_age
|
|
|
|
print(f'Revisions to be deleted created before {cutoff} ({humanize.naturaldelta(self.config.revision_age)}):')
|
|
|
|
notes = {}
|
|
|
|
for row in self.revisions_to_be_expired(db):
|
|
|
|
row['age'] = datetime.now(timezone.utc) - datetime.fromisoformat(row['createdAt'])
|
|
|
|
if row['noteId'] not in notes:
|
|
|
|
notes[row['noteId']] = []
|
|
|
|
notes[row['noteId']].append(row)
|
|
|
|
for revisionId, revisions in notes.items():
|
|
|
|
addr = self.email_from_email_or_profile(revisions[0])
|
|
|
|
url = self.config.url + '/' + (
|
|
|
|
revisions[0]["alias"] if revisions[0]["alias"] is not None else revisions[0]["shortid"])
|
|
|
|
print(f' {addr} {url}: {revisions[0]["title"]}')
|
|
|
|
for rev in revisions:
|
|
|
|
print(f' {humanize.naturaldelta(rev["age"])}: {rev["revisionId"]}')
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def expire_old_notes(self, db) -> None:
|
|
|
|
"""
|
|
|
|
Email old notes to their owners, then delete them.
|
|
|
|
:param db: the database connection
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
with db.prepare('DELETE FROM "Notes" WHERE "id" = $1') as delete_statement:
|
|
|
|
for note in self.notes_to_be_expired(db):
|
|
|
|
try:
|
|
|
|
note_age = datetime.now(timezone.utc) - datetime.fromisoformat(note['updatedAt'])
|
|
|
|
msg = MIMEMultipart()
|
|
|
|
msg['From'] = self.email_sender.mail_from
|
|
|
|
msg['To'] = self.email_from_email_or_profile(note)
|
|
|
|
msg['Subject'] = f'Your HedgeDoc Note "{note["title"]}" has expired'
|
|
|
|
msg.attach(MIMEText(dedent(f'''\
|
|
|
|
You created the note titled "{note["title"]}" on {note["createdAt"]}.
|
|
|
|
It was lasted updated {note['updatedAt']}, {humanize.naturaldelta(note_age)} ago. We expire all notes
|
|
|
|
that have not been updated within {humanize.naturaldelta(self.config.note_age)}.
|
|
|
|
|
|
|
|
Please find attached the contents of the latest revision of your note.
|
|
|
|
|
|
|
|
The admin team for {self.config.url}
|
|
|
|
|
|
|
|
''')))
|
|
|
|
md = MIMEBase('text', 'markdown')
|
|
|
|
md.add_header('Content-Disposition', f'attachment; filename={note["title"]}.md')
|
|
|
|
md.set_payload(note["content"])
|
|
|
|
msg.attach(md)
|
|
|
|
self.email_sender.send(msg)
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
# email backup of the note sent, now we can delete it
|
|
|
|
delete_statement(note["id"])
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
if self.config.verbose:
|
|
|
|
url = self.config.url + '/' + (note["alias"] if note["alias"] is not None else note["shortid"])
|
|
|
|
print(f'Note "{note["title"]}" ({url}) emailed to {msg["To"]}')
|
|
|
|
except Exception as e:
|
|
|
|
print(f'Unable to send email to {note["email"]}: {e}', file=sys.stderr)
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def expire_old_revisions(self, db) -> None:
|
|
|
|
"""
|
|
|
|
Removes all revision on all notes that have been modified earlier than age.
|
|
|
|
:param db: the database connection
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
cutoff = datetime.now(timezone.utc) - self.config.revision_age
|
|
|
|
with db.prepare('DELETE FROM "Revisions" WHERE "createdAt" < $1 RETURNING id') as delete:
|
|
|
|
rows = list(delete(cutoff))
|
|
|
|
if self.config.verbose:
|
|
|
|
print(f'Deleted {len(rows)} old revisions')
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def cmd_check(self) -> None:
|
|
|
|
with pgsql.Connection((self.config.postgres_hostname, self.config.postgres_port),
|
|
|
|
self.config.postgres_username, self.config.postgres_password) as db:
|
|
|
|
self.check_revisions_to_be_expired(db)
|
|
|
|
self.check_notes_to_be_expired(db)
|
2024-05-19 12:35:42 +02:00
|
|
|
|
2024-05-25 11:52:26 +02:00
|
|
|
def cmd_emailcheck(self) -> None:
|
|
|
|
msg = MIMEMultipart()
|
|
|
|
msg['From'] = self.email_sender.mail_from
|
|
|
|
msg['To'] = self.email_sender.mail_from
|
|
|
|
msg['Subject'] = f'Hedgedoc Expire: Test Mail'
|
|
|
|
msg.attach(MIMEText(dedent(f'''\
|
|
|
|
This is a test email to confirm proper configuration of the SMTP client.
|
|
|
|
|
|
|
|
The admin team for {self.config.url}
|
|
|
|
''')))
|
|
|
|
self.email_sender.send(msg)
|
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def cmd_expire(self) -> None:
|
|
|
|
with pgsql.Connection((self.config.postgres_hostname, self.config.postgres_port),
|
|
|
|
self.config.postgres_username, self.config.postgres_password) as db:
|
|
|
|
self.expire_old_revisions(db)
|
|
|
|
self.expire_old_notes(db)
|
2024-05-18 18:31:30 +02:00
|
|
|
|
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
def main():
|
2024-05-18 18:31:30 +02:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog='hedgedoc-expire',
|
2024-05-20 12:09:04 +02:00
|
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
|
|
description=dedent('''\
|
|
|
|
Remove old notes and revisions from Hedgedoc
|
|
|
|
|
|
|
|
Notes that have not been updated in the specified time will be emailed to the creator and then deleted.
|
|
|
|
Revisions of notes that have been created before the specified time will be deleted.
|
|
|
|
'''),
|
|
|
|
epilog=dedent('''\
|
|
|
|
command is one of:
|
|
|
|
- check: print a list of revisions and notes to be expired
|
|
|
|
- cron: run expire every 24 hours
|
|
|
|
- expire: expire old revisions and untouched notes
|
|
|
|
|
|
|
|
See https://git.hamburg.ccc.de/CCCHH/hedgedoc-expire
|
|
|
|
''')
|
|
|
|
)
|
2024-05-19 00:34:36 +02:00
|
|
|
parser.add_argument('-n', '--notes', metavar='DAYS', type=float, default=95,
|
2024-05-18 19:19:06 +02:00
|
|
|
help='remove all notes not changed in these many days')
|
2024-05-18 22:04:57 +02:00
|
|
|
parser.add_argument('-r', '--revisions', metavar='DAYS', type=float, default=14,
|
2024-05-18 19:19:06 +02:00
|
|
|
help='remove all revisions created more than these many days ago')
|
2024-05-25 11:52:26 +02:00
|
|
|
parser.add_argument('command', choices=['check', 'cron', 'emailcheck', 'expire'], default='check', nargs='?',
|
2024-05-20 12:09:04 +02:00
|
|
|
help='action to perform')
|
|
|
|
parser.add_argument('-v', '--verbose', action='store_true', default=False,
|
|
|
|
help='print more info while running')
|
2024-05-18 18:31:30 +02:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
config = Config()
|
2024-05-20 13:01:51 +02:00
|
|
|
config.note_age = timedelta(days=args.notes)
|
|
|
|
config.revision_age = timedelta(days=args.revisions)
|
2024-05-20 12:09:04 +02:00
|
|
|
config.verbose = args.verbose
|
2024-05-19 12:35:42 +02:00
|
|
|
mail = EmailSender(config.smtp_hostname, config.smtp_port, config.smtp_username, config.smtp_password,
|
|
|
|
config.smtp_from)
|
2024-05-20 12:09:04 +02:00
|
|
|
hedgedoc_expire = HedgedocExpire(config, mail)
|
|
|
|
|
|
|
|
if args.command == 'check':
|
|
|
|
hedgedoc_expire.cmd_check()
|
|
|
|
elif args.command == 'cron':
|
|
|
|
while True:
|
|
|
|
next_expire = datetime.now().replace(hour=2, minute=0, second=0, microsecond=0) + timedelta(days=1)
|
|
|
|
if args.verbose:
|
|
|
|
print(f'Next expire execution: {next_expire}')
|
|
|
|
seconds = (next_expire - datetime.now()).total_seconds()
|
|
|
|
if seconds > 0:
|
|
|
|
sleep(seconds)
|
|
|
|
hedgedoc_expire.cmd_expire()
|
2024-05-25 11:52:26 +02:00
|
|
|
elif args.command == 'emailcheck':
|
|
|
|
hedgedoc_expire.cmd_emailcheck()
|
2024-05-20 12:09:04 +02:00
|
|
|
elif args.command == 'expire':
|
|
|
|
hedgedoc_expire.cmd_expire()
|
|
|
|
else:
|
|
|
|
parser.print_help()
|
2024-05-18 18:31:30 +02:00
|
|
|
|
2024-05-20 12:09:04 +02:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|