From dfe159bd539e825a974acb5c31932e8c643a4380 Mon Sep 17 00:00:00 2001
From: c6ristian <c6ristian@christian.moe>
Date: Thu, 30 Nov 2023 20:37:36 +0100
Subject: [PATCH] init

---
 convert2.py | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100644 convert2.py

diff --git a/convert2.py b/convert2.py
new file mode 100644
index 0000000..a0b4bea
--- /dev/null
+++ b/convert2.py
@@ -0,0 +1,150 @@
+#!/bin/env python3
+import re
+import yaml
+import shutil
+import click
+from datetime import datetime
+from pathlib import Path
+
+
+def migrate_header_img_and_return_new_path_for_post(img_path, img_new_path):
+    is_http = re.compile(r'https?://')
+    if is_http.match(img_path):
+        return
+    img_pathlib = Path(img_path)
+    output_path = Path(img_new_path)
+    if re.compile(r'/').match(img_path):
+        old = 'source' + img_path
+    else:
+        old = 'source/posts/' + str(output_path.parent.name) + '/' + str(output_path.name) + '/' + img_path
+    new = img_new_path + '/' + img_pathlib.name
+    shutil.copyfile(old, new)
+    return new.replace(new.split('/')[0], '/blog')
+
+
+def convert_m2h(input_file: str, output_dir: str):
+    datetime_parse = re.compile(r"\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
+
+    # Read the YAML frontmatter
+    with open(input_file, 'r') as file:
+        contents = file.read()
+        front_matter = re.findall(r'---(.*?)---', contents, re.DOTALL)[0]
+
+    data = yaml.safe_load(front_matter)
+
+    # Convert the fields
+    if isinstance(data['date'], datetime):
+        data['publishDate'] = data['date'].isoformat()
+    if isinstance(data['date'], str):
+        data['publishDate'] = date_checker(data['date'])
+    del data['date']
+    data['lastmod'] = data['publishDate']
+
+    if 'tags' in data:
+        if data['tags'] is None or data['tags'] == []:
+            del data['tags']
+        else:
+            if not isinstance(data['tags'], list):
+                data['tags'] = data['tags'].split()
+
+    if 'ws' in data:
+        data['categories'] = 'event'
+        if 'where' in data['ws']:
+            data['location'] = data['ws'].pop('where')
+        if 'when' in data['ws']:
+            new_date = data['ws']['when']
+            if datetime_parse.match(str(new_date)):
+                data['date'] = date_checker(new_date)
+
+        del data['ws']
+    else:
+        data['categories'] = 'article'
+
+    if 'header_image' in data:
+        new_img_path = migrate_header_img_and_return_new_path_for_post(data['header_image'], output_dir)
+        header = {'image': new_img_path, 'caption': 'Sorry this blog entry was migrated, there is no Alt-Text'}
+        del data['header_image']
+        data['header'] = header
+
+    if 'author' in data:
+        data['authors'] = data['author']['display_name'].split(', ')
+        # delete fields
+        del data['author']
+
+    if 'meta' in data:
+        del data['meta']
+    if 'published' in data:
+        del data['published']
+    if 'status' in data:
+        del data['status']
+    if 'layout' in data:
+        del data['layout']
+    if 'type' in data:
+        del data['type']
+
+    data['draft'] = False
+
+    old_not_front_matter = re.findall(r'---.*?---(.*)', contents, re.DOTALL)[0]
+
+    pattern = re.compile("{.*}", re.DOTALL)
+    new_not_front_matter = pattern.sub("", old_not_front_matter)
+
+    # Create the new YAML frontmatter
+    new_front_matter = '\n' + yaml.safe_dump(data)
+
+    new_contents = contents.replace(front_matter, new_front_matter)
+    new_contents = new_contents.replace(old_not_front_matter, new_not_front_matter)
+
+    # Write the new contents to the file
+    with open(output_dir + "/index.md", 'w') as file:
+        file.write(new_contents)
+
+
+def date_checker(new_date: str):
+    dt_with_sek_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} .*")
+    dt_with_sek = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
+    dt_min_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2} .*")
+    dt_min = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
+    if dt_with_sek_tz.match(str(new_date)):
+        return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S %Z').astimezone().isoformat()
+    if dt_with_sek.match(str(new_date)):
+        return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S').astimezone().isoformat()
+    if dt_min_tz.match(str(new_date)):
+        return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M %Z').astimezone().isoformat()
+    if dt_min.match(str(new_date)):
+        return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M').astimezone().isoformat()
+
+
+def convert_year(year_path, output):
+    pattern = re.compile("\d{4}-{1}\d{2}-{1}\d{2}-.*")
+    postsOfYear = list(year_path.rglob('**/*.m*'))
+    for post in postsOfYear:
+        if pattern.match(post.parent.name):
+            output_path = Path(output, post.parent.name)
+            output_path.mkdir(exist_ok=True)
+            convert_m2h(post, str(output_path))
+        else:
+            output_path = Path(output, post.name.split('.')[0])
+            output_path.mkdir(exist_ok=True)
+            convert_m2h(post, str(output_path))
+
+
+def handle_list_conversion(input_dir, output_dir):
+    input_path = Path(input_dir)
+    for year in input_path.iterdir():
+        if year.is_dir():
+            output_path = Path(f"{output_dir}{year.name}")
+            if not output_path.exists():
+                output_path.mkdir(exist_ok=True)
+            convert_year(year, str(output_path))
+
+
+@click.command()
+@click.option("--input", help="Input Path.")
+@click.option("--output", help="Output Path")
+def click_cli(input, output):
+    handle_list_conversion(input, output)
+
+
+if __name__ == "__main__":
+    click_cli()