#!/bin/env python3 import re import yaml import shutil import click from datetime import datetime from pathlib import Path def migrate_header_img_and_return_new_path_for_post(img_path, img_new_path): is_http = re.compile(r'https?://') if is_http.match(img_path): return img_pathlib = Path(img_path) output_path = Path(img_new_path) if re.compile(r'/').match(img_path): old = str(Path(g_input).parent) + img_path else: old = g_input + str(output_path.parent.name) + '/' + str(output_path.name) + '/' + img_path new = img_new_path + '/' + img_pathlib.name shutil.copyfile(old, new) return str(img_pathlib.name) def convert_m2h(input_file: str, output_dir: str): datetime_parse = re.compile(r"\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}") # Read the YAML frontmatter with open(input_file, 'r') as file: contents = file.read() front_matter = re.findall(r'---(.*?)---', contents, re.DOTALL)[0] data = yaml.safe_load(front_matter) # Convert the fields if isinstance(data['date'], datetime): data['publishDate'] = data['date'].isoformat() if isinstance(data['date'], str): data['publishDate'] = date_checker(data['date']) del data['date'] data['lastmod'] = data['publishDate'] if 'tags' in data: if data['tags'] is None or data['tags'] == []: del data['tags'] else: if not isinstance(data['tags'], list): tag_list = list() data['tags'] = data['tags'].split() for t in data.get('tags'): tag_list.append(t.strip(',')) data['tags'] = tag_list if 'ws' in data: data['categories'] = 'event' if 'where' in data['ws']: data['location'] = data['ws'].pop('where') if 'when' in data['ws']: new_date = data['ws']['when'] if datetime_parse.match(str(new_date)): data['date'] = date_checker(new_date) del data['ws'] else: data['categories'] = 'article' if 'header_image' in data: new_img_path = migrate_header_img_and_return_new_path_for_post(data['header_image'], output_dir) if new_img_path is not None: header = {'image': new_img_path, 'caption': 'Sorry this blog entry was migrated, there is no Alt-Text'} data['header'] = header del data['header_image'] if 'author' in data: data['authors'] = data['author']['display_name'].split(', ') # delete fields del data['author'] data['slug'] = re.sub(r'\d{4}-\d{2}-\d{2}-', '', output_dir.split('/').pop()) + '-index' if 'meta' in data: del data['meta'] if 'published' in data: del data['published'] if 'status' in data: del data['status'] if 'layout' in data: del data['layout'] if 'type' in data: del data['type'] data['draft'] = False old_not_front_matter = re.findall(r'---.*?---(.*)', contents, re.DOTALL)[0] pattern = re.compile("{.*}", re.DOTALL) new_not_front_matter = pattern.sub("", old_not_front_matter) # Create the new YAML frontmatter new_front_matter = '\n' + yaml.safe_dump(data) new_contents = contents.replace(front_matter, new_front_matter) new_contents = new_contents.replace(old_not_front_matter, new_not_front_matter) # Write the new contents to the file with open(output_dir + "/index.md", 'w') as file: file.write(new_contents) def date_checker(new_date: str): dt_with_sek_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} .*") dt_with_sek = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}") dt_min_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2} .*") dt_min = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}") if dt_with_sek_tz.match(str(new_date)): return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S %Z').astimezone().isoformat() if dt_with_sek.match(str(new_date)): return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S').astimezone().isoformat() if dt_min_tz.match(str(new_date)): return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M %Z').astimezone().isoformat() if dt_min.match(str(new_date)): return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M').astimezone().isoformat() def convert_year(year_path, output): pattern = re.compile("\d{4}-{1}\d{2}-{1}\d{2}-.*") postsOfYear = list(year_path.rglob('**/*.m*')) for post in postsOfYear: if pattern.match(post.parent.name): output_path = Path(output, post.parent.name) output_path.mkdir(exist_ok=True) convert_m2h(post, str(output_path)) else: output_path = Path(output, post.name.split('.')[0]) output_path.mkdir(exist_ok=True) convert_m2h(post, str(output_path)) def handle_list_conversion(input_dir, output_dir): input_path = Path(input_dir) for year in input_path.iterdir(): if year.is_dir(): output_path = Path(f"{output_dir}{year.name}") if not output_path.exists(): output_path.mkdir(exist_ok=True) convert_year(year, str(output_path)) @click.command() @click.option("--input", help="Input Path.") @click.option("--output", help="Output Path") def click_cli(input, output): global g_input global g_output g_input = input g_output = output handle_list_conversion(input, output) if __name__ == "__main__": click_cli()