来源:https://stmorse.github.io/journal/notebook-converter.html
Written on April 7th, 2024 by Steven Morse
全文请查看原链接
To run this, just make sure the converter.py
script is in the same folder as your notebook (or modify the filepath accordingly) and do
~/yourfolder $ python3 converter.py yournotebook.ipynb
And it will create a folder called yournotebook
with a yournotebook.md
file and (if applicable) a bunch of .png
files.
You’ll probably have to do some editing of the markdown before it’s ready to publish, but I’d say this script gets you 90 percent of the way there. And, I dunno, I just like knowing exactly what the script is doing.
Hope this was helpful to you!
Here’s the full code just for good measure:
import json
import re
import base64
import os
import sys
def main(fname):
f = open(fname)
book = json.load(f)
name = fname[:-6]
post_title = name
post_date = '2023-xx-xx'
post_tags = ['python', 'mathematics']
post_header = '---\n\
layout: post\n\
title: "{title}"\n\
categories: journal\n\
date: {date}\n\
tags: {tags}\n\
---'
md_file_name = name + '.md'
img_embed_stem = r'https://stmorse.github.io/images/2024/' + name + '/'
img_format = '<img align="center" width="90%" \
src="{stem}{filename}" alt="{alttext}">'
# make subdirectory
os.mkdir(name)
text = ''
img_k = 0
text += post_header.format(title=post_title, date=post_date, tags=post_tags)
text += '\n\n'
for cell in book['cells']:
# code block
if cell['cell_type'] == 'code':
text += '\n```python\n'
text += ''.join(e for e in cell['source'])
text += '\n```\n\n'
# handle text or image outputs
for o in cell['outputs']:
try:
# handle code outputs
if 'text/plain' in o['data']:
text += '\n```\n'
text += '\n'.join(e for e in o['data']['text/plain'])
text += '\n```\n'
# handle images
if 'image/png' in o['data']:
# grab raw image byte string
s = o['data']['image/png']
# save image
ifname = f'image{img_k}.png'
with open(name + '/' + ifname, 'wb') as f:
# encode converts string->bytes, decode converts to img
f.write(base64.decodebytes(s.encode('latin-1')))
img_k += 1
# add image include to markdown text
text += '\n'
text += img_format.format(stem=img_embed_stem, filename=ifname, alttext=ifname)
text += '\n'
except KeyError:
pass
# markdown block
elif cell['cell_type'] == 'markdown':
# get content of markdown
temp = ''.join(e for e in cell['source'])
# replace $...$ with $$...$$ for my jekyll build :|
# REGEX explainer of r'([^\$])(\$[^\$]+\$)([^\$])'
# grabs 3 groups: preceding char, $...$, trail char. ignores $$
temp = re.sub(
r'([^\$])(\$[^\$]+\$)([^\$])',
lambda mo: mo.group(1) + '$' + mo.group(2) + '$' + mo.group(3),
temp
)
# TODO: this is buggy
# my jekyll build also doesn't like |
# replace with \vert
temp = re.sub(
r'([\|]+)(.)',
lambda mo: r'\vert'*len(mo.group(1)) + (' ' if mo.group(2) == ' ' else ' ' + mo.group(2)),
temp
)
text += '\n'
text += temp
text += '\n'
with open(name + '/' + md_file_name, 'w') as f:
f.write(text)
if __name__ == "__main__":
main(sys.argv[1])