2023-11-30 20:37:36 +01:00
|
|
|
#!/bin/env python3
|
|
|
|
import re
|
|
|
|
import yaml
|
|
|
|
import shutil
|
|
|
|
import click
|
|
|
|
from datetime import datetime
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
def migrate_header_img_and_return_new_path_for_post(img_path, img_new_path):
|
|
|
|
is_http = re.compile(r'https?://')
|
|
|
|
if is_http.match(img_path):
|
|
|
|
return
|
|
|
|
img_pathlib = Path(img_path)
|
|
|
|
output_path = Path(img_new_path)
|
|
|
|
if re.compile(r'/').match(img_path):
|
2023-11-30 21:09:15 +01:00
|
|
|
old = str(Path(g_input).parent) + img_path
|
2023-11-30 20:37:36 +01:00
|
|
|
else:
|
2023-11-30 21:09:15 +01:00
|
|
|
old = g_input + str(output_path.parent.name) + '/' + str(output_path.name) + '/' + img_path
|
2023-11-30 20:37:36 +01:00
|
|
|
new = img_new_path + '/' + img_pathlib.name
|
|
|
|
shutil.copyfile(old, new)
|
|
|
|
return new.replace(new.split('/')[0], '/blog')
|
|
|
|
|
|
|
|
|
|
|
|
def convert_m2h(input_file: str, output_dir: str):
|
|
|
|
datetime_parse = re.compile(r"\b\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
|
|
|
|
|
|
|
|
# Read the YAML frontmatter
|
|
|
|
with open(input_file, 'r') as file:
|
|
|
|
contents = file.read()
|
|
|
|
front_matter = re.findall(r'---(.*?)---', contents, re.DOTALL)[0]
|
|
|
|
|
|
|
|
data = yaml.safe_load(front_matter)
|
|
|
|
|
|
|
|
# Convert the fields
|
|
|
|
if isinstance(data['date'], datetime):
|
|
|
|
data['publishDate'] = data['date'].isoformat()
|
|
|
|
if isinstance(data['date'], str):
|
|
|
|
data['publishDate'] = date_checker(data['date'])
|
|
|
|
del data['date']
|
|
|
|
data['lastmod'] = data['publishDate']
|
|
|
|
|
|
|
|
if 'tags' in data:
|
|
|
|
if data['tags'] is None or data['tags'] == []:
|
|
|
|
del data['tags']
|
|
|
|
else:
|
|
|
|
if not isinstance(data['tags'], list):
|
2023-11-30 21:09:15 +01:00
|
|
|
tag_list = list()
|
2023-11-30 20:37:36 +01:00
|
|
|
data['tags'] = data['tags'].split()
|
2023-11-30 21:09:15 +01:00
|
|
|
for t in data.get('tags'):
|
|
|
|
tag_list.append(t.strip(','))
|
|
|
|
data['tags'] = tag_list
|
2023-11-30 20:37:36 +01:00
|
|
|
|
|
|
|
if 'ws' in data:
|
|
|
|
data['categories'] = 'event'
|
|
|
|
if 'where' in data['ws']:
|
|
|
|
data['location'] = data['ws'].pop('where')
|
|
|
|
if 'when' in data['ws']:
|
|
|
|
new_date = data['ws']['when']
|
|
|
|
if datetime_parse.match(str(new_date)):
|
|
|
|
data['date'] = date_checker(new_date)
|
|
|
|
|
|
|
|
del data['ws']
|
|
|
|
else:
|
|
|
|
data['categories'] = 'article'
|
|
|
|
|
|
|
|
if 'header_image' in data:
|
|
|
|
new_img_path = migrate_header_img_and_return_new_path_for_post(data['header_image'], output_dir)
|
|
|
|
header = {'image': new_img_path, 'caption': 'Sorry this blog entry was migrated, there is no Alt-Text'}
|
|
|
|
del data['header_image']
|
|
|
|
data['header'] = header
|
|
|
|
|
|
|
|
if 'author' in data:
|
|
|
|
data['authors'] = data['author']['display_name'].split(', ')
|
|
|
|
# delete fields
|
|
|
|
del data['author']
|
|
|
|
|
|
|
|
if 'meta' in data:
|
|
|
|
del data['meta']
|
|
|
|
if 'published' in data:
|
|
|
|
del data['published']
|
|
|
|
if 'status' in data:
|
|
|
|
del data['status']
|
|
|
|
if 'layout' in data:
|
|
|
|
del data['layout']
|
|
|
|
if 'type' in data:
|
|
|
|
del data['type']
|
|
|
|
|
|
|
|
data['draft'] = False
|
|
|
|
|
|
|
|
old_not_front_matter = re.findall(r'---.*?---(.*)', contents, re.DOTALL)[0]
|
|
|
|
|
|
|
|
pattern = re.compile("{.*}", re.DOTALL)
|
|
|
|
new_not_front_matter = pattern.sub("", old_not_front_matter)
|
|
|
|
|
|
|
|
# Create the new YAML frontmatter
|
|
|
|
new_front_matter = '\n' + yaml.safe_dump(data)
|
|
|
|
|
|
|
|
new_contents = contents.replace(front_matter, new_front_matter)
|
|
|
|
new_contents = new_contents.replace(old_not_front_matter, new_not_front_matter)
|
|
|
|
|
|
|
|
# Write the new contents to the file
|
|
|
|
with open(output_dir + "/index.md", 'w') as file:
|
|
|
|
file.write(new_contents)
|
|
|
|
|
|
|
|
|
|
|
|
def date_checker(new_date: str):
|
|
|
|
dt_with_sek_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} .*")
|
|
|
|
dt_with_sek = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
|
|
|
|
dt_min_tz = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2} .*")
|
|
|
|
dt_min = re.compile(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}")
|
|
|
|
if dt_with_sek_tz.match(str(new_date)):
|
|
|
|
return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S %Z').astimezone().isoformat()
|
|
|
|
if dt_with_sek.match(str(new_date)):
|
|
|
|
return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M:%S').astimezone().isoformat()
|
|
|
|
if dt_min_tz.match(str(new_date)):
|
|
|
|
return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M %Z').astimezone().isoformat()
|
|
|
|
if dt_min.match(str(new_date)):
|
|
|
|
return datetime.strptime(str(new_date), '%Y-%m-%d %H:%M').astimezone().isoformat()
|
|
|
|
|
|
|
|
|
|
|
|
def convert_year(year_path, output):
|
|
|
|
pattern = re.compile("\d{4}-{1}\d{2}-{1}\d{2}-.*")
|
|
|
|
postsOfYear = list(year_path.rglob('**/*.m*'))
|
|
|
|
for post in postsOfYear:
|
|
|
|
if pattern.match(post.parent.name):
|
|
|
|
output_path = Path(output, post.parent.name)
|
|
|
|
output_path.mkdir(exist_ok=True)
|
|
|
|
convert_m2h(post, str(output_path))
|
|
|
|
else:
|
|
|
|
output_path = Path(output, post.name.split('.')[0])
|
|
|
|
output_path.mkdir(exist_ok=True)
|
|
|
|
convert_m2h(post, str(output_path))
|
|
|
|
|
|
|
|
|
|
|
|
def handle_list_conversion(input_dir, output_dir):
|
|
|
|
input_path = Path(input_dir)
|
|
|
|
for year in input_path.iterdir():
|
|
|
|
if year.is_dir():
|
|
|
|
output_path = Path(f"{output_dir}{year.name}")
|
|
|
|
if not output_path.exists():
|
|
|
|
output_path.mkdir(exist_ok=True)
|
|
|
|
convert_year(year, str(output_path))
|
|
|
|
|
|
|
|
|
|
|
|
@click.command()
|
|
|
|
@click.option("--input", help="Input Path.")
|
|
|
|
@click.option("--output", help="Output Path")
|
|
|
|
def click_cli(input, output):
|
2023-11-30 21:09:15 +01:00
|
|
|
global g_input
|
|
|
|
global g_output
|
|
|
|
g_input = input
|
|
|
|
g_output = output
|
2023-11-30 20:37:36 +01:00
|
|
|
handle_list_conversion(input, output)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
click_cli()
|