zephyr/doc/scripts/extract_content.py
David B. Kinder 3bdb52f1ad doc: improved error reporting for non-UTF8 docs
Improve error reporting to include the filename being processed when a
problem occurs during a pre-generation phase when boards and samples
files are temporarily copied (by doc/scripts/extract_content.py) into
the documentation area for processing.

Two recent problems were noticed:

  Some new files were using window-1252 encoding and included windows
  printer quote marks and hyphens, for example 0x92 in window-1252
  encoding is Unicode 0x2019 for 'RIGHT SINGLE QUOTATION MARK'.

  An image file reference by a reST file was missing

Both of these threw an exception reporting the error, but did not
include any information about the file currently being processed, making
it hard to fix the problem (e.g., change the Windows right quote
character to an ASCII ').

Signed-off-by: David B. Kinder <david.b.kinder@intel.com>
2018-06-18 19:35:21 -04:00

81 lines
2.6 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright (c) 2017, Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Very quick script to move docs from different places into the doc directory
# to fix the website and external links
import os
import shutil
import re
import sys
import fnmatch
# direcories to search for .rst files
CONTENT_DIRS = ["samples", "boards"]
# directives to parse for included files
DIRECTIVES = ["figure","include","image","literalinclude"]
if "ZEPHYR_BASE" not in os.environ:
sys.stderr.write("$ZEPHYR_BASE environment variable undefined.\n")
exit(1)
ZEPHYR_BASE = os.environ["ZEPHYR_BASE"]
def get_rst_files(dir):
matches = []
for root, dirnames, filenames in os.walk('%s/%s' %(ZEPHYR_BASE, dir)):
for filename in fnmatch.filter(filenames, '*.rst'):
matches.append(os.path.join(root, filename))
for file in matches:
frel = file.replace(ZEPHYR_BASE,"").strip("/")
dir=os.path.dirname(frel)
if not os.path.exists(os.path.join(ZEPHYR_BASE, "doc", dir)):
os.makedirs(os.path.join(ZEPHYR_BASE, "doc", dir))
shutil.copyfile(file, os.path.join(ZEPHYR_BASE, "doc", frel))
try:
with open(file, encoding="utf-8") as f:
content = f.readlines()
content = [x.strip() for x in content]
directives = "|".join(DIRECTIVES)
pattern = re.compile("\s*\.\.\s+(%s)::\s+(.*)" %directives)
for l in content:
m = pattern.match(l)
if m:
inf = m.group(2)
ind = os.path.dirname(inf)
if not os.path.exists(os.path.join(ZEPHYR_BASE, "doc", dir, ind)):
os.makedirs(os.path.join(ZEPHYR_BASE, "doc", dir, ind))
try:
shutil.copyfile(os.path.join(ZEPHYR_BASE, dir, inf),
os.path.join(ZEPHYR_BASE, "doc", dir, inf))
except FileNotFoundError:
sys.stderr.write("File not found: %s\n reference by %s\n" % (inf, file))
except UnicodeDecodeError as e:
sys.stderr.write(
"Malformed {} in {}\n"
" Context: {}\n"
" Problematic data: {}\n"
" Reason: {}\n".format(
e.encoding, file,
e.object[max(e.start - 40, 0):e.end + 40],
e.object[e.start:e.end],
e.reason))
f.close()
def main():
for d in CONTENT_DIRS:
get_rst_files(d)
if __name__ == "__main__":
main()