#!/usr/bin/env python # -*- coding: utf-8 -*- # - Single-page document. # - Requirements to the md-souces: # - Don't use links without anchors. It means, that you can not just link file. You should specify an anchor at the top of the file and then link to this anchor # - Anchors should be unique through whole document. # - Implementation: # - Script gets list of the file from the `pages` section of `mkdocs.yml`. It gets commented files too, and it right. # - Files are concatenated by order with incrementing level of headers in all files except the first one # - Script converts links to other files into inside page links. # - Skipping links started with 'http' # - Not http-links with anchor are cutted to the anchor sign (#). # - For not http-links without anchor script logs an error and cuts them from the resulting single-page document. import codecs import sys import re import os if len(sys.argv) < 2: print "Usage: concatenate.py language_dir" print "Example: concatenate.py ru" sys.exit(1) if not os.path.exists(sys.argv[1]): print "Pass language_dir correctly. For example, 'ru'." sys.exit(2) #Configuration PROJ_CONFIG = 'mkdocs_'+sys.argv[1]+'.yml' SINGLE_PAGE = sys.argv[1]+'_single_page/index.md' DOCS_DIR = sys.argv[1]+'/' # 1. Open mkdocs.yml file and read `pages` configuration to get an ordered list of files cfg_file = open(PROJ_CONFIG) files_to_concatenate=[] for l in cfg_file : if( '.md' in l ) and ('single_page' not in l): path = (l[l.index(':')+1:]).strip(" '\n") files_to_concatenate.append(path) print str(len(files_to_concatenate)) + " files will be concatenated into single md-file.\nFiles:" print files_to_concatenate # 2. Concatenate all of the files in the list single_page_file = open(SINGLE_PAGE, 'w') first_file = True for path in files_to_concatenate: single_page_file.write('\n\n') file = open(DOCS_DIR+path) #function is passed into re.sub() to process links def link_proc( matchObj ): text, link = matchObj.group().strip('[)').split('](') if link.startswith('http'): return '['+text+']('+link+')' else : sharp_pos = link.find('#') if sharp_pos > -1: return '['+text+']('+link[sharp_pos:]+')' else : print 'ERROR: Link ['+text+']('+link+') in file '+path+' has no anchor. Please provide it.' #return '['+text+'](#'+link.replace('/','-')+')' for l in file: #Processing links in a string l = re.sub(r'\[.+?\]\(.+?\)', link_proc, l) #Correcting headers levels if not first_file: if( l.startswith('#') ): l='#'+l else : first_file = False single_page_file.write(l) single_page_file.close()