zephyr/doc/scripts/extract_content.py
Marti Bolivar 0913c296cf doc: fix extract_content.py dependency tracking
The logic which copies source documentation files into the build
directory could use some improvements to its dependency management, so
that when a source file changes, extract_content.py gets re-run.

Make these changes as follows:

- Add an --outputs flag to extract_content.py, so that the
  sources it depends on and generates can be saved into a file and
  thus made known to the build system

- Change the way the sources and destination are specified in the
  extract_content.py command line so that the entire job can be done
  in a single command, rather than multiple (to avoid having to
  collate multiple --outputs files in CMake)

- Extract the content at configure time with execute_process(),
  tracking all inputs and outputs within the build system itself. Use
  this information to make sure that each individual output depends on
  just its exact input file, ensuring updated inputs produce updated
  outputs without having to call extract_content.py again.

- Ensure that the "content" build system target depends on all the
  outputs, transitively triggering a rebuild any time an input
  file (e.g. .rst documentation file or included image/source file)
  changes.

Signed-off-by: Marti Bolivar <marti@foundries.io>
2018-10-12 11:37:48 +02:00

200 lines
7.3 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright (c) 2018, Foundries.io Ltd
# Copyright (c) 2018, Nordic Semiconductor ASA
# Copyright (c) 2017, Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Internal script used by the documentation's build system to create
# the "final" docs tree which is then compiled by Sphinx.
#
# This works around the fact that Sphinx needs a single documentation
# root directory, while Zephyr's documentation files are spread around
# the tree.
import argparse
import collections
import fnmatch
import os
from os import path
import re
import shutil
import sys
# directives to parse for included files
DIRECTIVES = ["figure", "include", "image", "literalinclude"]
# A simple namedtuple for a generated output file.
#
# - src: source file, what file should be copied (in source directory)
# - dst: destination file, path it should be copied to (in build directory)
Output = collections.namedtuple('Output', 'src dst')
# Represents the content which must be extracted from the Zephyr tree,
# as well as the output directories needed to contain it.
#
# - outputs: list of Output objects for extracted content.
# - output_dirs: set of directories which must exist to contain
# output destination files.
Content = collections.namedtuple('Content', 'outputs output_dirs')
def src_deps(zephyr_base, src_file, dest):
# - zephyr_base: the ZEPHYR_BASE directory containing src_file
# - src_file: path to a source file in the documentation
# - dest: path to the top-level output/destination directory
#
# Return a list of Output objects which contain src_file's
# additional dependencies, as they should be copied into
# dest. Output paths inside dest are based on each
# dependency's relative path from zephyr_base.
# Inspect only .rst files for directives referencing other files
# we'll need to copy (as configured in the DIRECTIVES variable)
if not src_file.endswith(".rst"):
return []
# Load the file's contents, bailing on decode errors.
try:
with open(src_file, encoding="utf-8") as f:
content = [x.strip() for x in f.readlines()]
except UnicodeDecodeError as e:
sys.stderr.write(
"Malformed {} in {}\n"
" Context: {}\n"
" Problematic data: {}\n"
" Reason: {}\n".format(
e.encoding, src_file,
e.object[max(e.start - 40, 0):e.end + 40],
e.object[e.start:e.end],
e.reason))
return []
# Source file's directory.
src_dir = path.dirname(src_file)
# Destination directory for any dependencies.
dst_dir = path.join(dest, path.relpath(src_dir, start=zephyr_base))
# Find directives in the content which imply additional
# dependencies. We assume each such directive takes a single
# argument, which is a (relative) path to the additional
# dependency file.
directives = "|".join(DIRECTIVES)
pattern = re.compile("\.\.\s+(?P<directive>%s)::\s+(?P<dep_rel>.*)" %
directives)
deps = []
for l in content:
m = pattern.match(l)
if not m:
continue
dep_rel = m.group('dep_rel') # relative to src_dir
dep_src = path.abspath(path.join(src_dir, dep_rel))
if not path.isfile(dep_src):
print("File not found:", dep_src, "\n referenced by:",
src_file, file=sys.stderr)
continue
dep_dst = path.abspath(path.join(dst_dir, dep_rel))
deps.append(Output(dep_src, dep_dst))
return deps
def find_content(zephyr_base, src, dest, fnfilter, ignore):
# Create a list of Outputs to copy over, and new directories we
# might need to make to contain them. Don't copy any files or
# otherwise modify dest.
outputs = []
output_dirs = set()
for dirpath, dirnames, filenames in os.walk(path.join(zephyr_base, src)):
# Limit the rest of the walk to subdirectories that aren't ignored.
dirnames[:] = [d for d in dirnames if not
path.join(dirpath, d).startswith(ignore)]
# If the current directory contains no matching files, keep going.
sources = fnmatch.filter(filenames, fnfilter)
if not sources:
continue
# There are sources here; track that the output directory
# needs to exist.
dst_dir = path.join(dest, path.relpath(dirpath, start=zephyr_base))
output_dirs.add(path.abspath(dst_dir))
# Initialize an Output for each source file, as well as any of
# that file's additional dependencies. Make sure output
# directories for dependencies are tracked too.
for src_rel in sources:
src_abs = path.join(dirpath, src_rel)
deps = src_deps(zephyr_base, src_abs, dest)
for depdir in (path.dirname(d.dst) for d in deps):
output_dirs.add(depdir)
outputs.extend(deps)
outputs.append(Output(src_abs,
path.abspath(path.join(dst_dir, src_rel))))
return Content(outputs, output_dirs)
def extract_content(content):
# Ensure each output subdirectory exists.
for d in content.output_dirs:
os.makedirs(d, exist_ok=True)
# Create each output file. Use copy2() to avoid updating
# modification times unnecessarily, as this triggers documentation
# rebuilds.
for output in content.outputs:
shutil.copy2(output.src, output.dst)
def main():
parser = argparse.ArgumentParser(
description='''Recursively copy documentation files from ZEPHYR_BASE to
a destination folder, along with files referenced in those .rst files
by a configurable list of directives: {}. The ZEPHYR_BASE environment
variable is used to determine source directories to copy files
from.'''.format(DIRECTIVES))
parser.add_argument('--outputs',
help='If given, save input/output files to this path')
parser.add_argument('--ignore', action='append',
help='''Source directories to ignore when copying
files. This may be given multiple times.''')
parser.add_argument('content_config', nargs='+',
help='''A glob:source:destination specification
for content to extract. The "glob" is a documentation
file name pattern to include, "source" is a source
directory to search for such files in, and
"destination" is the directory to copy it into.''')
args = parser.parse_args()
if "ZEPHYR_BASE" not in os.environ:
sys.exit("ZEPHYR_BASE environment variable undefined.")
zephyr_base = os.environ["ZEPHYR_BASE"]
if not args.ignore:
ignore = ()
else:
ignore = tuple(path.normpath(ign) for ign in args.ignore)
content_config = [cfg.split(':', 2) for cfg in args.content_config]
outputs = set()
for fnfilter, source, dest in content_config:
content = find_content(zephyr_base, source, dest, fnfilter, ignore)
extract_content(content)
outputs |= set(content.outputs)
if args.outputs:
with open(args.outputs, 'w') as f:
for o in outputs:
print(o.src, file=f, end='\n')
print(o.dst, file=f, end='\n')
if __name__ == "__main__":
main()