From dfe159bd539e825a974acb5c31932e8c643a4380 Mon Sep 17 00:00:00 2001 From: c6ristian Date: Thu, 30 Nov 2023 20:37:36 +0100 Subject: [PATCH] init --- convert2.py | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 convert2.py diff --git a/convert2.py b/convert2.py new file mode 100644 index 0000000..a0b4bea --- /dev/null +++ b/convert2.py @@ -0,0 +1,150 @@ +#!/bin/env python3 +import re +import yaml +import shutil +import click +from datetime import datetime +from pathlib import Path + + +def migrate_header_img_and_return_new_path_for_post(img_path, img_new_path): + is_http = re.compile(r'https?://') + if is_http.match(img_path): + return + img_pathlib = Path(img_path) + output_path = Path(img_new_path) + if re.compile(r'/').match(img_path): + old = 'source' + img_path + else: + old = 'source/posts/' + str(output_path.parent.name) + '/' + str(output_path.name) + '/' + img_path + new = img_new_path + '/' + img_pathlib.name + shutil.copyfile(old, new) + return new.replace(new.split('/')[0], '/blog') + + +def convert_m2h(input_file: str, output_dir: str): + datetime_parse = re.compile(r"\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}") + + # Read the YAML frontmatter + with open(input_file, 'r') as file: + contents = file.read() + front_matter = re.findall(r'---(.*?)---', contents, re.DOTALL)[0] + + data = yaml.safe_load(front_matter) + + # Convert the fields + if isinstance(data['date'], datetime): + data['publishDate'] = data['date'].isoformat() + if isinstance(data['date'], str): + data['publishDate'] = date_checker(data['date']) + del data['date'] + data['lastmod'] = data['publishDate'] + + if 'tags' in data: + if data['tags'] is None or data['tags'] == []: + del data['tags'] + else: + if not isinstance(data['tags'], list): + data['tags'] = data['tags'].split() + + if 'ws' in data: + data['categories'] = 'event' + if 'where' in data['ws']: + data['location'] = data['ws'].pop('where') + if 'when' in data['ws']: + new_date = data['ws']['when'] + if datetime_parse.match(str(new_date)): + data['date'] = date_checker(new_date) + + del data['ws'] + else: + data['categories'] = 'article' + + if 'header_image' in data: + new_img_path = migrate_header_img_and_return_new_path_for_post(data['header_image'], output_dir) + header = {'image': new_img_path, 'caption': 'Sorry this blog entry was migrated, there is no Alt-Text'} + del data['header_image'] + data['header'] = header + + if 'author' in data: + data['authors'] = data['author']['display_name'].split(', ') + # delete fields + del data['author'] + + if 'meta' in data: + del data['meta'] + if 'published' in data: + del data['published'] + if 'status' in data: + del data['status'] + if 'layout' in data: + del data['layout'] + if 'type' in data: + del data['type'] + + data['draft'] = False + + old_not_front_matter = re.findall(r'---.*?---(.*)', contents, re.DOTALL)[0] + + pattern = re.compile("{.*}", re.DOTALL) + new_not_front_matter = pattern.sub("", old_not_front_matter) + + # Create the new YAML frontmatter + new_front_matter = '\n' + yaml.safe_dump(data) + + new_contents = contents.replace(front_matter, new_front_matter) + new_contents = new_contents.replace(old_not_front_matter, new_not_front_matter) + + # Write the new contents to the file + with open(output_dir + "/index.md", 'w') as file: + file.write(new_contents) + + +def date_checker(new_date: str): + dt_with_sek_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} .*") + dt_with_sek = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}") + dt_min_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2} .*") + dt_min = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}") + if dt_with_sek_tz.match(str(new_date)): + return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S %Z').astimezone().isoformat() + if dt_with_sek.match(str(new_date)): + return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S').astimezone().isoformat() + if dt_min_tz.match(str(new_date)): + return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M %Z').astimezone().isoformat() + if dt_min.match(str(new_date)): + return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M').astimezone().isoformat() + + +def convert_year(year_path, output): + pattern = re.compile("\d{4}-{1}\d{2}-{1}\d{2}-.*") + postsOfYear = list(year_path.rglob('**/*.m*')) + for post in postsOfYear: + if pattern.match(post.parent.name): + output_path = Path(output, post.parent.name) + output_path.mkdir(exist_ok=True) + convert_m2h(post, str(output_path)) + else: + output_path = Path(output, post.name.split('.')[0]) + output_path.mkdir(exist_ok=True) + convert_m2h(post, str(output_path)) + + +def handle_list_conversion(input_dir, output_dir): + input_path = Path(input_dir) + for year in input_path.iterdir(): + if year.is_dir(): + output_path = Path(f"{output_dir}{year.name}") + if not output_path.exists(): + output_path.mkdir(exist_ok=True) + convert_year(year, str(output_path)) + + +@click.command() +@click.option("--input", help="Input Path.") +@click.option("--output", help="Output Path") +def click_cli(input, output): + handle_list_conversion(input, output) + + +if __name__ == "__main__": + click_cli()