2017-12-29 12:43:05 +00:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
2018-02-16 10:33:30 +00:00
# - Single-page document.
# - Requirements to the md-souces:
# - Don't use links without anchors. It means, that you can not just link file. You should specify an anchor at the top of the file and then link to this anchor
# - Anchors should be unique through whole document.
# - Implementation:
# - Script gets list of the file from the `pages` section of `mkdocs.yml`. It gets commented files too, and it right.
# - Files are concatenated by order with incrementing level of headers in all files except the first one
# - Script converts links to other files into inside page links.
# - Skipping links started with 'http'
# - Not http-links with anchor are cutted to the anchor sign (#).
# - For not http-links without anchor script logs an error and cuts them from the resulting single-page document.
2017-12-29 12:43:05 +00:00
import codecs
import sys
import re
import os
if len ( sys . argv ) < 2 :
print " Usage: concatenate.py language_dir "
2018-02-16 10:33:30 +00:00
print " Example: concatenate.py ru "
2017-12-29 12:43:05 +00:00
sys . exit ( 1 )
if not os . path . exists ( sys . argv [ 1 ] ) :
print " Pass language_dir correctly. For example, ' ru ' . "
sys . exit ( 2 )
2018-02-21 18:44:33 +00:00
# Configuration
PROJ_CONFIG = ' mkdocs_ ' + sys . argv [ 1 ] + ' .yml '
SINGLE_PAGE = sys . argv [ 1 ] + ' _single_page/index.md '
DOCS_DIR = sys . argv [ 1 ] + ' / '
2017-12-29 12:43:05 +00:00
# 1. Open mkdocs.yml file and read `pages` configuration to get an ordered list of files
cfg_file = open ( PROJ_CONFIG )
2018-02-21 18:44:33 +00:00
files_to_concatenate = [ ]
2017-12-29 12:43:05 +00:00
2018-02-21 18:44:33 +00:00
for l in cfg_file :
if ( ' .md ' in l ) and ( ' single_page ' not in l ) :
path = ( l [ l . index ( ' : ' ) + 1 : ] ) . strip ( " ' \n " )
2017-12-29 12:43:05 +00:00
files_to_concatenate . append ( path )
print str ( len ( files_to_concatenate ) ) + " files will be concatenated into single md-file. \n Files: "
print files_to_concatenate
# 2. Concatenate all of the files in the list
single_page_file = open ( SINGLE_PAGE , ' w ' )
first_file = True
for path in files_to_concatenate :
single_page_file . write ( ' \n \n ' )
2018-02-21 18:44:33 +00:00
file = open ( DOCS_DIR + path )
2017-12-29 12:43:05 +00:00
2018-02-21 18:44:33 +00:00
# function is passed into re.sub() to process links
def link_proc ( matchObj ) :
2017-12-29 12:43:05 +00:00
text , link = matchObj . group ( ) . strip ( ' [) ' ) . split ( ' ]( ' )
if link . startswith ( ' http ' ) :
2018-02-21 18:44:33 +00:00
return ' [ ' + text + ' ]( ' + link + ' ) '
else :
2017-12-29 12:43:05 +00:00
sharp_pos = link . find ( ' # ' )
if sharp_pos > - 1 :
2018-02-21 18:44:33 +00:00
return ' [ ' + text + ' ]( ' + link [ sharp_pos : ] + ' ) '
else :
print ' ERROR: Link [ ' + text + ' ]( ' + link + ' ) in file ' + path + ' has no anchor. Please provide it. '
# return '['+text+'](#'+link.replace('/','-')+')'
2017-12-29 12:43:05 +00:00
for l in file :
2018-02-21 18:44:33 +00:00
# Processing links in a string
2017-12-29 12:43:05 +00:00
l = re . sub ( r ' \ [.+? \ ] \ (.+? \ ) ' , link_proc , l )
2018-02-21 18:44:33 +00:00
# Correcting headers levels
2017-12-29 12:43:05 +00:00
if not first_file :
2018-02-21 18:44:33 +00:00
if ( l . startswith ( ' # ' ) ) :
l = ' # ' + l
else :
2017-12-29 12:43:05 +00:00
first_file = False
single_page_file . write ( l )
single_page_file . close ( )