【转载】Jupyter Notebook转Markdown简易脚本

来源：https://stmorse.github.io/journal/notebook-converter.html

Written on April 7th, 2024 by Steven Morse

全文请查看原链接

To run this, just make sure the converter.py script is in the same folder as your notebook (or modify the filepath accordingly) and do

~/yourfolder $ python3 converter.py yournotebook.ipynb

And it will create a folder called yournotebook with a yournotebook.md file and (if applicable) a bunch of .png files.

You’ll probably have to do some editing of the markdown before it’s ready to publish, but I’d say this script gets you 90 percent of the way there. And, I dunno, I just like knowing exactly what the script is doing.

Hope this was helpful to you!

Here’s the full code just for good measure:

import json
import re
import base64
import os
import sys

def main(fname):
    f = open(fname)
    book = json.load(f)

    name = fname[:-6]

    post_title = name
    post_date = '2023-xx-xx'
    post_tags = ['python', 'mathematics']

    post_header = '---\n\
layout: post\n\
title: "{title}"\n\
categories: journal\n\
date: {date}\n\
tags: {tags}\n\
---'

    md_file_name = name + '.md'

    img_embed_stem = r'https://stmorse.github.io/images/2024/' + name + '/'
    img_format = '<img align="center" width="90%" \
    src="{stem}{filename}" alt="{alttext}">'

    # make subdirectory 
    os.mkdir(name)

    text = ''
    img_k = 0

    text += post_header.format(title=post_title, date=post_date, tags=post_tags)
    text += '\n\n'
    for cell in book['cells']:
        # code block
        if cell['cell_type'] == 'code':
            text += '\n```python\n'
            text += ''.join(e for e in cell['source'])
            text += '\n```\n\n'

            # handle text or image outputs
            for o in cell['outputs']:
                try:
                    # handle code outputs
                    if 'text/plain' in o['data']:
                        text += '\n```\n'
                        text += '\n'.join(e for e in o['data']['text/plain'])
                        text += '\n```\n'
                    
                    # handle images
                    if 'image/png' in o['data']:
                        # grab raw image byte string
                        s = o['data']['image/png']

                        # save image
                        ifname = f'image{img_k}.png'
                        with open(name + '/' + ifname, 'wb') as f:
                            # encode converts string->bytes, decode converts to img
                            f.write(base64.decodebytes(s.encode('latin-1')))
                            img_k += 1

                        # add image include to markdown text
                        text += '\n'
                        text += img_format.format(stem=img_embed_stem, filename=ifname, alttext=ifname)
                        text += '\n'
                except KeyError:
                    pass

        # markdown block
        elif cell['cell_type'] == 'markdown':
            # get content of markdown
            temp = ''.join(e for e in cell['source'])

            # replace $...$ with $$...$$ for my jekyll build :|
            # REGEX explainer of r'([^\$])(\$[^\$]+\$)([^\$])'
            # grabs 3 groups: preceding char, $...$, trail char. ignores $$
            temp = re.sub(
                r'([^\$])(\$[^\$]+\$)([^\$])', 
                lambda mo: mo.group(1) + '$' + mo.group(2) + '$' + mo.group(3), 
                temp
            )

            # TODO: this is buggy
            # my jekyll build also doesn't like |
            # replace with \vert
            temp = re.sub(
                r'([\|]+)(.)',
                lambda mo: r'\vert'*len(mo.group(1)) + (' ' if mo.group(2) == ' ' else ' ' + mo.group(2)),
                temp
            )

            text += '\n'
            text += temp
            text += '\n'

    with open(name + '/' + md_file_name, 'w') as f:
        f.write(text)

if __name__ == "__main__":
    main(sys.argv[1])

相关

发送评论编辑评论

相关

发送评论 编辑评论

发送评论编辑评论