zephyr/doc/scripts/extract_content.py
Marti Bolivar d7dc113783 doc: avoid copying content twice
In a clean build, Ninja is creating copies of the extracted
documentation outputs regardless of whether they already exist or
not. I can't explain this behavior from the documentation of the build
command, but it's so, even if extract_content.py is patched to use
copyfile() instead of copy2() so that outputs (copied .rst files in
the build directory) have strictly newer mtimes than inputs (source
tree .rst files).

This extra copying doesn't happen for incremental builds, for some
reason.

To account for this behavior, add and use an extract_content.py option
that skips content extraction completely and leaves that all up to the
add_custom_command() which tracks each individual input->output build
command.

Reported-by: Carles Cufi <carles.cufi@nordicsemi.no>
Signed-off-by: Marti Bolivar <marti@foundries.io>
2018-10-16 15:10:18 -04:00

207 lines
7.7 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright (c) 2018, Foundries.io Ltd
# Copyright (c) 2018, Nordic Semiconductor ASA
# Copyright (c) 2017, Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
# Internal script used by the documentation's build system to create
# the "final" docs tree which is then compiled by Sphinx.
#
# This works around the fact that Sphinx needs a single documentation
# root directory, while Zephyr's documentation files are spread around
# the tree.
import argparse
import collections
import fnmatch
import os
from os import path
import re
import shutil
import sys
# directives to parse for included files
DIRECTIVES = ["figure", "include", "image", "literalinclude"]
# A simple namedtuple for a generated output file.
#
# - src: source file, what file should be copied (in source directory)
# - dst: destination file, path it should be copied to (in build directory)
Output = collections.namedtuple('Output', 'src dst')
# Represents the content which must be extracted from the Zephyr tree,
# as well as the output directories needed to contain it.
#
# - outputs: list of Output objects for extracted content.
# - output_dirs: set of directories which must exist to contain
# output destination files.
Content = collections.namedtuple('Content', 'outputs output_dirs')
def src_deps(zephyr_base, src_file, dest):
# - zephyr_base: the ZEPHYR_BASE directory containing src_file
# - src_file: path to a source file in the documentation
# - dest: path to the top-level output/destination directory
#
# Return a list of Output objects which contain src_file's
# additional dependencies, as they should be copied into
# dest. Output paths inside dest are based on each
# dependency's relative path from zephyr_base.
# Inspect only .rst files for directives referencing other files
# we'll need to copy (as configured in the DIRECTIVES variable)
if not src_file.endswith(".rst"):
return []
# Load the file's contents, bailing on decode errors.
try:
with open(src_file, encoding="utf-8") as f:
content = [x.strip() for x in f.readlines()]
except UnicodeDecodeError as e:
sys.stderr.write(
"Malformed {} in {}\n"
" Context: {}\n"
" Problematic data: {}\n"
" Reason: {}\n".format(
e.encoding, src_file,
e.object[max(e.start - 40, 0):e.end + 40],
e.object[e.start:e.end],
e.reason))
return []
# Source file's directory.
src_dir = path.dirname(src_file)
# Destination directory for any dependencies.
dst_dir = path.join(dest, path.relpath(src_dir, start=zephyr_base))
# Find directives in the content which imply additional
# dependencies. We assume each such directive takes a single
# argument, which is a (relative) path to the additional
# dependency file.
directives = "|".join(DIRECTIVES)
pattern = re.compile("\.\.\s+(?P<directive>%s)::\s+(?P<dep_rel>.*)" %
directives)
deps = []
for l in content:
m = pattern.match(l)
if not m:
continue
dep_rel = m.group('dep_rel') # relative to src_dir
dep_src = path.abspath(path.join(src_dir, dep_rel))
if not path.isfile(dep_src):
print("File not found:", dep_src, "\n referenced by:",
src_file, file=sys.stderr)
continue
dep_dst = path.abspath(path.join(dst_dir, dep_rel))
deps.append(Output(dep_src, dep_dst))
return deps
def find_content(zephyr_base, src, dest, fnfilter, ignore):
# Create a list of Outputs to copy over, and new directories we
# might need to make to contain them. Don't copy any files or
# otherwise modify dest.
outputs = []
output_dirs = set()
for dirpath, dirnames, filenames in os.walk(path.join(zephyr_base, src)):
# Limit the rest of the walk to subdirectories that aren't ignored.
dirnames[:] = [d for d in dirnames if not
path.join(dirpath, d).startswith(ignore)]
# If the current directory contains no matching files, keep going.
sources = fnmatch.filter(filenames, fnfilter)
if not sources:
continue
# There are sources here; track that the output directory
# needs to exist.
dst_dir = path.join(dest, path.relpath(dirpath, start=zephyr_base))
output_dirs.add(path.abspath(dst_dir))
# Initialize an Output for each source file, as well as any of
# that file's additional dependencies. Make sure output
# directories for dependencies are tracked too.
for src_rel in sources:
src_abs = path.join(dirpath, src_rel)
deps = src_deps(zephyr_base, src_abs, dest)
for depdir in (path.dirname(d.dst) for d in deps):
output_dirs.add(depdir)
outputs.extend(deps)
outputs.append(Output(src_abs,
path.abspath(path.join(dst_dir, src_rel))))
return Content(outputs, output_dirs)
def extract_content(content):
# Ensure each output subdirectory exists.
for d in content.output_dirs:
os.makedirs(d, exist_ok=True)
# Create each output file. Use copy2() to avoid updating
# modification times unnecessarily, as this triggers documentation
# rebuilds.
for output in content.outputs:
shutil.copy2(output.src, output.dst)
def main():
parser = argparse.ArgumentParser(
description='''Recursively copy documentation files from ZEPHYR_BASE to
a destination folder, along with files referenced in those .rst files
by a configurable list of directives: {}. The ZEPHYR_BASE environment
variable is used to determine source directories to copy files
from.'''.format(DIRECTIVES))
parser.add_argument('--outputs',
help='If given, save input/output files to this path')
parser.add_argument('--just-outputs', action='store_true',
help='''Skip extraction and just list outputs.
Cannot be given without --outputs.''')
parser.add_argument('--ignore', action='append',
help='''Source directories to ignore when copying
files. This may be given multiple times.''')
parser.add_argument('content_config', nargs='+',
help='''A glob:source:destination specification
for content to extract. The "glob" is a documentation
file name pattern to include, "source" is a source
directory to search for such files in, and
"destination" is the directory to copy it into.''')
args = parser.parse_args()
if "ZEPHYR_BASE" not in os.environ:
sys.exit("ZEPHYR_BASE environment variable undefined.")
zephyr_base = os.environ["ZEPHYR_BASE"]
if not args.ignore:
ignore = ()
else:
ignore = tuple(path.normpath(ign) for ign in args.ignore)
if args.just_outputs and not args.outputs:
sys.exit('--just-outputs cannot be given without --outputs')
content_config = [cfg.split(':', 2) for cfg in args.content_config]
outputs = set()
for fnfilter, source, dest in content_config:
content = find_content(zephyr_base, source, dest, fnfilter, ignore)
if not args.just_outputs:
extract_content(content)
outputs |= set(content.outputs)
if args.outputs:
with open(args.outputs, 'w') as f:
for o in outputs:
print(o.src, file=f, end='\n')
print(o.dst, file=f, end='\n')
if __name__ == "__main__":
main()