docs: Move the python libraries to tools/lib/python

"scripts/lib" was always a bit of an awkward place for Python modules.  We
already have tools/lib; create a tools/lib/python, move the libraries
there, and update the users accordingly.

While at it, move the contents of tools/docs/lib.  Rather than make another
directory, just put these documentation-oriented modules under "kdoc".

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Message-ID: <20251110220430.726665-2-corbet@lwn.net>
This commit is contained in:
Jonathan Corbet
2025-11-10 15:04:29 -07:00
parent f690e07859
commit 778b8ebe51
28 changed files with 29 additions and 19 deletions

View File

@@ -0,0 +1,628 @@
#!/usr/bin/env python3
# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
# SPDX-License-Identifier: GPL-2.0
"""
Parse ABI documentation and produce results from it.
"""
from argparse import Namespace
import logging
import os
import re
from pprint import pformat
from random import randrange, seed
# Import Python modules
from helpers import AbiDebug, ABI_DIR
class AbiParser:
"""Main class to parse ABI files"""
TAGS = r"(what|where|date|kernelversion|contact|description|users)"
XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
def __init__(self, directory, logger=None,
enable_lineno=False, show_warnings=True, debug=0):
"""Stores arguments for the class and initialize class vars"""
self.directory = directory
self.enable_lineno = enable_lineno
self.show_warnings = show_warnings
self.debug = debug
if not logger:
self.log = logging.getLogger("get_abi")
else:
self.log = logger
self.data = {}
self.what_symbols = {}
self.file_refs = {}
self.what_refs = {}
# Ignore files that contain such suffixes
self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
# Regular expressions used on parser
self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
self.re_valid = re.compile(self.TAGS)
self.re_start_spc = re.compile(r"(\s*)(\S.*)")
self.re_whitespace = re.compile(r"^\s+")
# Regular used on print
self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
self.re_xref_node = re.compile(self.XREF)
def warn(self, fdata, msg, extra=None):
"""Displays a parse error if warning is enabled"""
if not self.show_warnings:
return
msg = f"{fdata.fname}:{fdata.ln}: {msg}"
if extra:
msg += "\n\t\t" + extra
self.log.warning(msg)
def add_symbol(self, what, fname, ln=None, xref=None):
"""Create a reference table describing where each 'what' is located"""
if what not in self.what_symbols:
self.what_symbols[what] = {"file": {}}
if fname not in self.what_symbols[what]["file"]:
self.what_symbols[what]["file"][fname] = []
if ln and ln not in self.what_symbols[what]["file"][fname]:
self.what_symbols[what]["file"][fname].append(ln)
if xref:
self.what_symbols[what]["xref"] = xref
def _parse_line(self, fdata, line):
"""Parse a single line of an ABI file"""
new_what = False
new_tag = False
content = None
match = self.re_tag.match(line)
if match:
new = match.group(1).lower()
sep = match.group(2)
content = match.group(3)
match = self.re_valid.search(new)
if match:
new_tag = match.group(1)
else:
if fdata.tag == "description":
# New "tag" is actually part of description.
# Don't consider it a tag
new_tag = False
elif fdata.tag != "":
self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
if new_tag:
# "where" is Invalid, but was a common mistake. Warn if found
if new_tag == "where":
self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
new_tag = "what"
if new_tag == "what":
fdata.space = None
if content not in self.what_symbols:
self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
if fdata.tag == "what":
fdata.what.append(content.strip("\n"))
else:
if fdata.key:
if "description" not in self.data.get(fdata.key, {}):
self.warn(fdata, f"{fdata.key} doesn't have a description")
for w in fdata.what:
self.add_symbol(what=w, fname=fdata.fname,
ln=fdata.what_ln, xref=fdata.key)
fdata.label = content
new_what = True
key = "abi_" + content.lower()
fdata.key = self.re_unprintable.sub("_", key).strip("_")
# Avoid duplicated keys but using a defined seed, to make
# the namespace identical if there aren't changes at the
# ABI symbols
seed(42)
while fdata.key in self.data:
char = randrange(0, 51) + ord("A")
if char > ord("Z"):
char += ord("a") - ord("Z") - 1
fdata.key += chr(char)
if fdata.key and fdata.key not in self.data:
self.data[fdata.key] = {
"what": [content],
"file": [fdata.file_ref],
"path": fdata.ftype,
"line_no": fdata.ln,
}
fdata.what = self.data[fdata.key]["what"]
self.what_refs[content] = fdata.key
fdata.tag = new_tag
fdata.what_ln = fdata.ln
if fdata.nametag["what"]:
t = (content, fdata.key)
if t not in fdata.nametag["symbols"]:
fdata.nametag["symbols"].append(t)
return
if fdata.tag and new_tag:
fdata.tag = new_tag
if new_what:
fdata.label = ""
if "description" in self.data[fdata.key]:
self.data[fdata.key]["description"] += "\n\n"
if fdata.file_ref not in self.data[fdata.key]["file"]:
self.data[fdata.key]["file"].append(fdata.file_ref)
if self.debug == AbiDebug.WHAT_PARSING:
self.log.debug("what: %s", fdata.what)
if not fdata.what:
self.warn(fdata, "'What:' should come first:", line)
return
if new_tag == "description":
fdata.space = None
if content:
sep = sep.replace(":", " ")
c = " " * len(new_tag) + sep + content
c = c.expandtabs()
match = self.re_start_spc.match(c)
if match:
# Preserve initial spaces for the first line
fdata.space = match.group(1)
content = match.group(2) + "\n"
self.data[fdata.key][fdata.tag] = content
return
# Store any contents before tags at the database
if not fdata.tag and "what" in fdata.nametag:
fdata.nametag["description"] += line
return
if fdata.tag == "description":
content = line.expandtabs()
if self.re_whitespace.sub("", content) == "":
self.data[fdata.key][fdata.tag] += "\n"
return
if fdata.space is None:
match = self.re_start_spc.match(content)
if match:
# Preserve initial spaces for the first line
fdata.space = match.group(1)
content = match.group(2) + "\n"
else:
if content.startswith(fdata.space):
content = content[len(fdata.space):]
else:
fdata.space = ""
if fdata.tag == "what":
w = content.strip("\n")
if w:
self.data[fdata.key][fdata.tag].append(w)
else:
self.data[fdata.key][fdata.tag] += content
return
content = line.strip()
if fdata.tag:
if fdata.tag == "what":
w = content.strip("\n")
if w:
self.data[fdata.key][fdata.tag].append(w)
else:
self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
return
# Everything else is error
if content:
self.warn(fdata, "Unexpected content", line)
def parse_readme(self, nametag, fname):
"""Parse ABI README file"""
nametag["what"] = ["Introduction"]
nametag["path"] = "README"
with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
for line in fp:
match = self.re_tag.match(line)
if match:
new = match.group(1).lower()
match = self.re_valid.search(new)
if match:
nametag["description"] += "\n:" + line
continue
nametag["description"] += line
def parse_file(self, fname, path, basename):
"""Parse a single file"""
ref = f"abi_file_{path}_{basename}"
ref = self.re_unprintable.sub("_", ref).strip("_")
# Store per-file state into a namespace variable. This will be used
# by the per-line parser state machine and by the warning function.
fdata = Namespace
fdata.fname = fname
fdata.name = basename
pos = fname.find(ABI_DIR)
if pos > 0:
f = fname[pos:]
else:
f = fname
fdata.file_ref = (f, ref)
self.file_refs[f] = ref
fdata.ln = 0
fdata.what_ln = 0
fdata.tag = ""
fdata.label = ""
fdata.what = []
fdata.key = None
fdata.xrefs = None
fdata.space = None
fdata.ftype = path.split("/")[0]
fdata.nametag = {}
fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
fdata.nametag["type"] = "File"
fdata.nametag["path"] = fdata.ftype
fdata.nametag["file"] = [fdata.file_ref]
fdata.nametag["line_no"] = 1
fdata.nametag["description"] = ""
fdata.nametag["symbols"] = []
self.data[ref] = fdata.nametag
if self.debug & AbiDebug.WHAT_OPEN:
self.log.debug("Opening file %s", fname)
if basename == "README":
self.parse_readme(fdata.nametag, fname)
return
with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
for line in fp:
fdata.ln += 1
self._parse_line(fdata, line)
if "description" in fdata.nametag:
fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
if fdata.key:
if "description" not in self.data.get(fdata.key, {}):
self.warn(fdata, f"{fdata.key} doesn't have a description")
for w in fdata.what:
self.add_symbol(what=w, fname=fname, xref=fdata.key)
def _parse_abi(self, root=None):
"""Internal function to parse documentation ABI recursively"""
if not root:
root = self.directory
with os.scandir(root) as obj:
for entry in obj:
name = os.path.join(root, entry.name)
if entry.is_dir():
self._parse_abi(name)
continue
if not entry.is_file():
continue
basename = os.path.basename(name)
if basename.startswith("."):
continue
if basename.endswith(self.ignore_suffixes):
continue
path = self.re_abi_dir.sub("", os.path.dirname(name))
self.parse_file(name, path, basename)
def parse_abi(self, root=None):
"""Parse documentation ABI"""
self._parse_abi(root)
if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
self.log.debug(pformat(self.data))
def desc_txt(self, desc):
"""Print description as found inside ABI files"""
desc = desc.strip(" \t\n")
return desc + "\n\n"
def xref(self, fname):
"""
Converts a Documentation/ABI + basename into a ReST cross-reference
"""
xref = self.file_refs.get(fname)
if not xref:
return None
else:
return xref
def desc_rst(self, desc):
"""Enrich ReST output by creating cross-references"""
# Remove title markups from the description
# Having titles inside ABI files will only work if extra
# care would be taken in order to strictly follow the same
# level order for each markup.
desc = self.re_title_mark.sub("\n\n", "\n" + desc)
desc = desc.rstrip(" \t\n").lstrip("\n")
# Python's regex performance for non-compiled expressions is a lot
# than Perl, as Perl automatically caches them at their
# first usage. Here, we'll need to do the same, as otherwise the
# performance penalty is be high
new_desc = ""
for d in desc.split("\n"):
if d == "":
new_desc += "\n"
continue
# Use cross-references for doc files where needed
d = self.re_doc.sub(r":doc:`/\1`", d)
# Use cross-references for ABI generated docs where needed
matches = self.re_abi.findall(d)
for m in matches:
abi = m[0] + m[1]
xref = self.file_refs.get(abi)
if not xref:
# This may happen if ABI is on a separate directory,
# like parsing ABI testing and symbol is at stable.
# The proper solution is to move this part of the code
# for it to be inside sphinx/kernel_abi.py
self.log.info("Didn't find ABI reference for '%s'", abi)
else:
new = self.re_escape.sub(r"\\\1", m[1])
d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
# Seek for cross reference symbols like /sys/...
# Need to be careful to avoid doing it on a code block
if d[0] not in [" ", "\t"]:
matches = self.re_xref_node.findall(d)
for m in matches:
# Finding ABI here is more complex due to wildcards
xref = self.what_refs.get(m)
if xref:
new = self.re_escape.sub(r"\\\1", m)
d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
new_desc += d + "\n"
return new_desc + "\n\n"
def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
filter_path=None):
"""Print ABI at stdout"""
part = None
for key, v in sorted(self.data.items(),
key=lambda x: (x[1].get("type", ""),
x[1].get("what"))):
wtype = v.get("type", "Symbol")
file_ref = v.get("file")
names = v.get("what", [""])
if wtype == "File":
if not show_file:
continue
else:
if not show_symbols:
continue
if filter_path:
if v.get("path") != filter_path:
continue
msg = ""
if wtype != "File":
cur_part = names[0]
if cur_part.find("/") >= 0:
match = self.re_what.match(cur_part)
if match:
symbol = match.group(1).rstrip("/")
cur_part = "Symbols under " + symbol
if cur_part and cur_part != part:
part = cur_part
msg += part + "\n"+ "-" * len(part) +"\n\n"
msg += f".. _{key}:\n\n"
max_len = 0
for i in range(0, len(names)): # pylint: disable=C0200
names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
max_len = max(max_len, len(names[i]))
msg += "+-" + "-" * max_len + "-+\n"
for name in names:
msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
msg += "+-" + "-" * max_len + "-+\n"
msg += "\n"
for ref in file_ref:
if wtype == "File":
msg += f".. _{ref[1]}:\n\n"
else:
base = os.path.basename(ref[0])
msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
if wtype == "File":
msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
desc = v.get("description")
if not desc and wtype != "File":
msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
if desc:
if output_in_txt:
msg += self.desc_txt(desc)
else:
msg += self.desc_rst(desc)
symbols = v.get("symbols")
if symbols:
msg += "Has the following ABI:\n\n"
for w, label in symbols:
# Escape special chars from content
content = self.re_escape.sub(r"\\\1", w)
msg += f"- :ref:`{content} <{label}>`\n\n"
users = v.get("users")
if users and users.strip(" \t\n"):
users = users.strip("\n").replace('\n', '\n\t')
msg += f"Users:\n\t{users}\n\n"
ln = v.get("line_no", 1)
yield (msg, file_ref[0][0], ln)
def check_issues(self):
"""Warn about duplicated ABI entries"""
for what, v in self.what_symbols.items():
files = v.get("file")
if not files:
# Should never happen if the parser works properly
self.log.warning("%s doesn't have a file associated", what)
continue
if len(files) == 1:
continue
f = []
for fname, lines in sorted(files.items()):
if not lines:
f.append(f"{fname}")
elif len(lines) == 1:
f.append(f"{fname}:{lines[0]}")
else:
m = fname + "lines "
m += ", ".join(str(x) for x in lines)
f.append(m)
self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
def search_symbols(self, expr):
""" Searches for ABI symbols """
regex = re.compile(expr, re.I)
found_keys = 0
for t in sorted(self.data.items(), key=lambda x: [0]):
v = t[1]
wtype = v.get("type", "")
if wtype == "File":
continue
for what in v.get("what", [""]):
if regex.search(what):
found_keys += 1
kernelversion = v.get("kernelversion", "").strip(" \t\n")
date = v.get("date", "").strip(" \t\n")
contact = v.get("contact", "").strip(" \t\n")
users = v.get("users", "").strip(" \t\n")
desc = v.get("description", "").strip(" \t\n")
files = []
for f in v.get("file", ()):
files.append(f[0])
what = str(found_keys) + ". " + what
title_tag = "-" * len(what)
print(f"\n{what}\n{title_tag}\n")
if kernelversion:
print(f"Kernel version:\t\t{kernelversion}")
if date:
print(f"Date:\t\t\t{date}")
if contact:
print(f"Contact:\t\t{contact}")
if users:
print(f"Users:\t\t\t{users}")
print("Defined on file(s):\t" + ", ".join(files))
if desc:
desc = desc.strip("\n")
print(f"\n{desc}\n")
if not found_keys:
print(f"Regular expression /{expr}/ not found.")

View File

@@ -0,0 +1,234 @@
#!/usr/bin/env python3
# xxpylint: disable=R0903
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
# SPDX-License-Identifier: GPL-2.0
"""
Convert ABI what into regular expressions
"""
import re
import sys
from pprint import pformat
from abi_parser import AbiParser
from helpers import AbiDebug
class AbiRegex(AbiParser):
"""Extends AbiParser to search ABI nodes with regular expressions"""
# Escape only ASCII visible characters
escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
leave_others = "others"
# Tuples with regular expressions to be compiled and replacement data
re_whats = [
# Drop escape characters that might exist
(re.compile("\\\\"), ""),
# Temporarily escape dot characters
(re.compile(r"\."), "\xf6"),
# Temporarily change [0-9]+ type of patterns
(re.compile(r"\[0\-9\]\+"), "\xff"),
# Temporarily change [\d+-\d+] type of patterns
(re.compile(r"\[0\-\d+\]"), "\xff"),
(re.compile(r"\[0:\d+\]"), "\xff"),
(re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"),
# Temporarily change [0-9] type of patterns
(re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"),
# Handle multiple option patterns
(re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
# Handle wildcards
(re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
(re.compile(r"/\*/"), "/.*/"),
(re.compile(r"/\xf6\xf6\xf6"), "/.*"),
(re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
(re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
(re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
(re.compile(r"XX+"), "\\\\w\xf7"),
(re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
(re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
(re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
# Recover [0-9] type of patterns
(re.compile(r"\xf4"), "["),
(re.compile(r"\xf5"), "]"),
# Remove duplicated spaces
(re.compile(r"\s+"), r" "),
# Special case: drop comparison as in:
# What: foo = <something>
# (this happens on a few IIO definitions)
(re.compile(r"\s*\=.*$"), ""),
# Escape all other symbols
(re.compile(escape_symbols), r"\\\1"),
(re.compile(r"\\\\"), r"\\"),
(re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
(re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
(re.compile(r"\xff"), r"\\d+"),
# Special case: IIO ABI which a parenthesis.
(re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
# Simplify regexes with multiple .*
(re.compile(r"(?:\.\*){2,}"), ""),
# Recover dot characters
(re.compile(r"\xf6"), "\\."),
# Recover plus characters
(re.compile(r"\xf7"), "+"),
]
re_has_num = re.compile(r"\\d")
# Symbol name after escape_chars that are considered a devnode basename
re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$")
# List of popular group names to be skipped to minimize regex group size
# Use AbiDebug.SUBGROUP_SIZE to detect those
skip_names = set(["devices", "hwmon"])
def regex_append(self, what, new):
"""
Get a search group for a subset of regular expressions.
As ABI may have thousands of symbols, using a for to search all
regular expressions is at least O(n^2). When there are wildcards,
the complexity increases substantially, eventually becoming exponential.
To avoid spending too much time on them, use a logic to split
them into groups. The smaller the group, the better, as it would
mean that searches will be confined to a small number of regular
expressions.
The conversion to a regex subset is tricky, as we need something
that can be easily obtained from the sysfs symbol and from the
regular expression. So, we need to discard nodes that have
wildcards.
If it can't obtain a subgroup, place the regular expression inside
a special group (self.leave_others).
"""
search_group = None
for search_group in reversed(new.split("/")):
if not search_group or search_group in self.skip_names:
continue
if self.re_symbol_name.match(search_group):
break
if not search_group:
search_group = self.leave_others
if self.debug & AbiDebug.SUBGROUP_MAP:
self.log.debug("%s: mapped as %s", what, search_group)
try:
if search_group not in self.regex_group:
self.regex_group[search_group] = []
self.regex_group[search_group].append(re.compile(new))
if self.search_string:
if what.find(self.search_string) >= 0:
print(f"What: {what}")
except re.PatternError:
self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
" '%s'", what, new)
def get_regexes(self, what):
"""
Given an ABI devnode, return a list of all regular expressions that
may match it, based on the sub-groups created by regex_append()
"""
re_list = []
patches = what.split("/")
patches.reverse()
patches.append(self.leave_others)
for search_group in patches:
if search_group in self.regex_group:
re_list += self.regex_group[search_group]
return re_list
def __init__(self, *args, **kwargs):
"""
Override init method to get verbose argument
"""
self.regex_group = None
self.search_string = None
self.re_string = None
if "search_string" in kwargs:
self.search_string = kwargs.get("search_string")
del kwargs["search_string"]
if self.search_string:
try:
self.re_string = re.compile(self.search_string)
except re.PatternError as e:
msg = f"{self.search_string} is not a valid regular expression"
raise ValueError(msg) from e
super().__init__(*args, **kwargs)
def parse_abi(self, *args, **kwargs):
super().parse_abi(*args, **kwargs)
self.regex_group = {}
print("Converting ABI What fields into regexes...", file=sys.stderr)
for t in sorted(self.data.items(), key=lambda x: x[0]):
v = t[1]
if v.get("type") == "File":
continue
v["regex"] = []
for what in v.get("what", []):
if not what.startswith("/sys"):
continue
new = what
for r, s in self.re_whats:
try:
new = r.sub(s, new)
except re.PatternError as e:
# Help debugging troubles with new regexes
raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
v["regex"].append(new)
if self.debug & AbiDebug.REGEX:
self.log.debug("%-90s <== %s", new, what)
# Store regex into a subgroup to speedup searches
self.regex_append(what, new)
if self.debug & AbiDebug.SUBGROUP_DICT:
self.log.debug("%s", pformat(self.regex_group))
if self.debug & AbiDebug.SUBGROUP_SIZE:
biggestd_keys = sorted(self.regex_group.keys(),
key= lambda k: len(self.regex_group[k]),
reverse=True)
print("Top regex subgroups:", file=sys.stderr)
for k in biggestd_keys[:10]:
print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env python3
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
# pylint: disable=R0903
# SPDX-License-Identifier: GPL-2.0
"""
Helper classes for ABI parser
"""
ABI_DIR = "Documentation/ABI/"
class AbiDebug:
"""Debug levels"""
WHAT_PARSING = 1
WHAT_OPEN = 2
DUMP_ABI_STRUCTS = 4
UNDEFINED = 8
REGEX = 16
SUBGROUP_MAP = 32
SUBGROUP_DICT = 64
SUBGROUP_SIZE = 128
GRAPH = 256
DEBUG_HELP = """
1 - enable debug parsing logic
2 - enable debug messages on file open
4 - enable debug for ABI parse data
8 - enable extra debug information to identify troubles
with ABI symbols found at the local machine that
weren't found on ABI documentation (used only for
undefined subcommand)
16 - enable debug for what to regex conversion
32 - enable debug for symbol regex subgroups
64 - enable debug for sysfs graph tree variable
"""

View File

@@ -0,0 +1,378 @@
#!/usr/bin/env python3
# pylint: disable=R0902,R0912,R0914,R0915,R1702
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
# SPDX-License-Identifier: GPL-2.0
"""
Parse ABI documentation and produce results from it.
"""
import os
import re
import sys
from concurrent import futures
from datetime import datetime
from random import shuffle
from helpers import AbiDebug
class SystemSymbols:
"""Stores arguments for the class and initialize class vars"""
def graph_add_file(self, path, link=None):
"""
add a file path to the sysfs graph stored at self.root
"""
if path in self.files:
return
name = ""
ref = self.root
for edge in path.split("/"):
name += edge + "/"
if edge not in ref:
ref[edge] = {"__name": [name.rstrip("/")]}
ref = ref[edge]
if link and link not in ref["__name"]:
ref["__name"].append(link.rstrip("/"))
self.files.add(path)
def print_graph(self, root_prefix="", root=None, level=0):
"""Prints a reference tree graph using UTF-8 characters"""
if not root:
root = self.root
level = 0
# Prevent endless traverse
if level > 5:
return
if level > 0:
prefix = "├──"
last_prefix = "└──"
else:
prefix = ""
last_prefix = ""
items = list(root.items())
names = root.get("__name", [])
for k, edge in items:
if k == "__name":
continue
if not k:
k = "/"
if len(names) > 1:
k += " links: " + ",".join(names[1:])
if edge == items[-1][1]:
print(root_prefix + last_prefix + k)
p = root_prefix
if level > 0:
p += " "
self.print_graph(p, edge, level + 1)
else:
print(root_prefix + prefix + k)
p = root_prefix + ""
self.print_graph(p, edge, level + 1)
def _walk(self, root):
"""
Walk through sysfs to get all devnodes that aren't ignored.
By default, uses /sys as sysfs mounting point. If another
directory is used, it replaces them to /sys at the patches.
"""
with os.scandir(root) as obj:
for entry in obj:
path = os.path.join(root, entry.name)
if self.sysfs:
p = path.replace(self.sysfs, "/sys", count=1)
else:
p = path
if self.re_ignore.search(p):
return
# Handle link first to avoid directory recursion
if entry.is_symlink():
real = os.path.realpath(path)
if not self.sysfs:
self.aliases[path] = real
else:
real = real.replace(self.sysfs, "/sys", count=1)
# Add absfile location to graph if it doesn't exist
if not self.re_ignore.search(real):
# Add link to the graph
self.graph_add_file(real, p)
elif entry.is_file():
self.graph_add_file(p)
elif entry.is_dir():
self._walk(path)
def __init__(self, abi, sysfs="/sys", hints=False):
"""
Initialize internal variables and get a list of all files inside
sysfs that can currently be parsed.
Please notice that there are several entries on sysfs that aren't
documented as ABI. Ignore those.
The real paths will be stored under self.files. Aliases will be
stored in separate, as self.aliases.
"""
self.abi = abi
self.log = abi.log
if sysfs != "/sys":
self.sysfs = sysfs.rstrip("/")
else:
self.sysfs = None
self.hints = hints
self.root = {}
self.aliases = {}
self.files = set()
dont_walk = [
# Those require root access and aren't documented at ABI
f"^{sysfs}/kernel/debug",
f"^{sysfs}/kernel/tracing",
f"^{sysfs}/fs/pstore",
f"^{sysfs}/fs/bpf",
f"^{sysfs}/fs/fuse",
# This is not documented at ABI
f"^{sysfs}/module",
f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI
f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings
"sections|notes", # aren't actually part of ABI
# kernel-parameters.txt - not easy to parse
"parameters",
]
self.re_ignore = re.compile("|".join(dont_walk))
print(f"Reading {sysfs} directory contents...", file=sys.stderr)
self._walk(sysfs)
def check_file(self, refs, found):
"""Check missing ABI symbols for a given sysfs file"""
res_list = []
try:
for names in refs:
fname = names[0]
res = {
"found": False,
"fname": fname,
"msg": "",
}
res_list.append(res)
re_what = self.abi.get_regexes(fname)
if not re_what:
self.abi.log.warning(f"missing rules for {fname}")
continue
for name in names:
for r in re_what:
if self.abi.debug & AbiDebug.UNDEFINED:
self.log.debug("check if %s matches '%s'", name, r.pattern)
if r.match(name):
res["found"] = True
if found:
res["msg"] += f" {fname}: regex:\n\t"
continue
if self.hints and not res["found"]:
res["msg"] += f" {fname} not found. Tested regexes:\n"
for r in re_what:
res["msg"] += " " + r.pattern + "\n"
except KeyboardInterrupt:
pass
return res_list
def _ref_interactor(self, root):
"""Recursive function to interact over the sysfs tree"""
for k, v in root.items():
if isinstance(v, dict):
yield from self._ref_interactor(v)
if root == self.root or k == "__name":
continue
if self.abi.re_string:
fname = v["__name"][0]
if self.abi.re_string.search(fname):
yield v
else:
yield v
def get_fileref(self, all_refs, chunk_size):
"""Interactor to group refs into chunks"""
n = 0
refs = []
for ref in all_refs:
refs.append(ref)
n += 1
if n >= chunk_size:
yield refs
n = 0
refs = []
yield refs
def check_undefined_symbols(self, max_workers=None, chunk_size=50,
found=None, dry_run=None):
"""Seach ABI for sysfs symbols missing documentation"""
self.abi.parse_abi()
if self.abi.debug & AbiDebug.GRAPH:
self.print_graph()
all_refs = []
for ref in self._ref_interactor(self.root):
all_refs.append(ref["__name"])
if dry_run:
print("Would check", file=sys.stderr)
for ref in all_refs:
print(", ".join(ref))
return
print("Starting to search symbols (it may take several minutes):",
file=sys.stderr)
start = datetime.now()
old_elapsed = None
# Python doesn't support multithreading due to limitations on its
# global lock (GIL). While Python 3.13 finally made GIL optional,
# there are still issues related to it. Also, we want to have
# backward compatibility with older versions of Python.
#
# So, use instead multiprocess. However, Python is very slow passing
# data from/to multiple processes. Also, it may consume lots of memory
# if the data to be shared is not small. So, we need to group workload
# in chunks that are big enough to generate performance gains while
# not being so big that would cause out-of-memory.
num_refs = len(all_refs)
print(f"Number of references to parse: {num_refs}", file=sys.stderr)
if not max_workers:
max_workers = os.cpu_count()
elif max_workers > os.cpu_count():
max_workers = os.cpu_count()
max_workers = max(max_workers, 1)
max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
chunk_size = min(chunk_size, max_chunk_size)
chunk_size = max(1, chunk_size)
if max_workers > 1:
executor = futures.ProcessPoolExecutor
# Place references in a random order. This may help improving
# performance, by mixing complex/simple expressions when creating
# chunks
shuffle(all_refs)
else:
# Python has a high overhead with processes. When there's just
# one worker, it is faster to not create a new process.
# Yet, User still deserves to have a progress print. So, use
# python's "thread", which is actually a single process, using
# an internal schedule to switch between tasks. No performance
# gains for non-IO tasks, but still it can be quickly interrupted
# from time to time to display progress.
executor = futures.ThreadPoolExecutor
not_found = []
f_list = []
with executor(max_workers=max_workers) as exe:
for refs in self.get_fileref(all_refs, chunk_size):
if refs:
try:
f_list.append(exe.submit(self.check_file, refs, found))
except KeyboardInterrupt:
return
total = len(f_list)
if not total:
if self.abi.re_string:
print(f"No ABI symbol matches {self.abi.search_string}")
else:
self.abi.log.warning("No ABI symbols found")
return
print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
file=sys.stderr)
while f_list:
try:
t = futures.wait(f_list, timeout=1,
return_when=futures.FIRST_COMPLETED)
done = t[0]
for fut in done:
res_list = fut.result()
for res in res_list:
if not res["found"]:
not_found.append(res["fname"])
if res["msg"]:
print(res["msg"])
f_list.remove(fut)
except KeyboardInterrupt:
return
except RuntimeError as e:
self.abi.log.warning(f"Future: {e}")
break
if sys.stderr.isatty():
elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
if len(f_list) < total:
elapsed += f" ({total - len(f_list)}/{total} jobs completed). "
if elapsed != old_elapsed:
print(elapsed + "\r", end="", flush=True,
file=sys.stderr)
old_elapsed = elapsed
elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
print(elapsed, file=sys.stderr)
for f in sorted(not_found):
print(f"{f} not found.")

149
tools/lib/python/jobserver.py Executable file
View File

@@ -0,0 +1,149 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0+
#
# pylint: disable=C0103,C0209
#
#
"""
Interacts with the POSIX jobserver during the Kernel build time.
A "normal" jobserver task, like the one initiated by a make subrocess would do:
- open read/write file descriptors to communicate with the job server;
- ask for one slot by calling:
claim = os.read(reader, 1)
- when the job finshes, call:
os.write(writer, b"+") # os.write(writer, claim)
Here, the goal is different: This script aims to get the remaining number
of slots available, using all of them to run a command which handle tasks in
parallel. To to that, it has a loop that ends only after there are no
slots left. It then increments the number by one, in order to allow a
call equivalent to make -j$((claim+1)), e.g. having a parent make creating
$claim child to do the actual work.
The end goal here is to keep the total number of build tasks under the
limit established by the initial make -j$n_proc call.
See:
https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver
"""
import errno
import os
import subprocess
import sys
class JobserverExec:
"""
Claim all slots from make using POSIX Jobserver.
The main methods here are:
- open(): reserves all slots;
- close(): method returns all used slots back to make;
- run(): executes a command setting PARALLELISM=<available slots jobs + 1>
"""
def __init__(self):
"""Initialize internal vars"""
self.claim = 0
self.jobs = b""
self.reader = None
self.writer = None
self.is_open = False
def open(self):
"""Reserve all available slots to be claimed later on"""
if self.is_open:
return
try:
# Fetch the make environment options.
flags = os.environ["MAKEFLAGS"]
# Look for "--jobserver=R,W"
# Note that GNU Make has used --jobserver-fds and --jobserver-auth
# so this handles all of them.
opts = [x for x in flags.split(" ") if x.startswith("--jobserver")]
# Parse out R,W file descriptor numbers and set them nonblocking.
# If the MAKEFLAGS variable contains multiple instances of the
# --jobserver-auth= option, the last one is relevant.
fds = opts[-1].split("=", 1)[1]
# Starting with GNU Make 4.4, named pipes are used for reader
# and writer.
# Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134
_, _, path = fds.partition("fifo:")
if path:
self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
self.writer = os.open(path, os.O_WRONLY)
else:
self.reader, self.writer = [int(x) for x in fds.split(",", 1)]
# Open a private copy of reader to avoid setting nonblocking
# on an unexpecting process with the same reader fd.
self.reader = os.open("/proc/self/fd/%d" % (self.reader),
os.O_RDONLY | os.O_NONBLOCK)
# Read out as many jobserver slots as possible
while True:
try:
slot = os.read(self.reader, 8)
self.jobs += slot
except (OSError, IOError) as e:
if e.errno == errno.EWOULDBLOCK:
# Stop at the end of the jobserver queue.
break
# If something went wrong, give back the jobs.
if self.jobs:
os.write(self.writer, self.jobs)
raise e
# Add a bump for our caller's reserveration, since we're just going
# to sit here blocked on our child.
self.claim = len(self.jobs) + 1
except (KeyError, IndexError, ValueError, OSError, IOError):
# Any missing environment strings or bad fds should result in just
# not being parallel.
self.claim = None
self.is_open = True
def close(self):
"""Return all reserved slots to Jobserver"""
if not self.is_open:
return
# Return all the reserved slots.
if len(self.jobs):
os.write(self.writer, self.jobs)
self.is_open = False
def __enter__(self):
self.open()
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self.close()
def run(self, cmd, *args, **pwargs):
"""
Run a command setting PARALLELISM env variable to the number of
available job slots (claim) + 1, e.g. it will reserve claim slots
to do the actual build work, plus one to monitor its children.
"""
self.open() # Ensure that self.claim is set
# We can only claim parallelism if there was a jobserver (i.e. a
# top-level "-jN" argument) and there were no other failures. Otherwise
# leave out the environment variable and let the child figure out what
# is best.
if self.claim:
os.environ["PARALLELISM"] = str(self.claim)
return subprocess.call(cmd, *args, **pwargs)

View File

@@ -0,0 +1,70 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
"""
Ancillary argparse HelpFormatter class that works on a similar way as
argparse.RawDescriptionHelpFormatter, e.g. description maintains line
breaks, but it also implement transformations to the help text. The
actual transformations ar given by enrich_text(), if the output is tty.
Currently, the follow transformations are done:
- Positional arguments are shown in upper cases;
- if output is TTY, ``var`` and positional arguments are shown prepended
by an ANSI SGR code. This is usually translated to bold. On some
terminals, like, konsole, this is translated into a colored bold text.
"""
import argparse
import re
import sys
class EnrichFormatter(argparse.HelpFormatter):
"""
Better format the output, making easier to identify the positional args
and how they're used at the __doc__ description.
"""
def __init__(self, *args, **kwargs):
"""Initialize class and check if is TTY"""
super().__init__(*args, **kwargs)
self._tty = sys.stdout.isatty()
def enrich_text(self, text):
"""Handle ReST markups (currently, only ``foo``)"""
if self._tty and text:
# Replace ``text`` with ANSI SGR (bold)
return re.sub(r'\`\`(.+?)\`\`',
lambda m: f'\033[1m{m.group(1)}\033[0m', text)
return text
def _fill_text(self, text, width, indent):
"""Enrich descriptions with markups on it"""
enriched = self.enrich_text(text)
return "\n".join(indent + line for line in enriched.splitlines())
def _format_usage(self, usage, actions, groups, prefix):
"""Enrich positional arguments at usage: line"""
prog = self._prog
parts = []
for action in actions:
if action.option_strings:
opt = action.option_strings[0]
if action.nargs != 0:
opt += f" {action.dest.upper()}"
parts.append(f"[{opt}]")
else:
# Positional argument
parts.append(self.enrich_text(f"``{action.dest.upper()}``"))
usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n"
return usage_text
def _format_action_invocation(self, action):
"""Enrich argument names"""
if not action.option_strings:
return self.enrich_text(f"``{action.dest.upper()}``")
return ", ".join(action.option_strings)

View File

@@ -0,0 +1,294 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
#
# pylint: disable=R0903,R0913,R0914,R0917
"""
Parse lernel-doc tags on multiple kernel source files.
"""
import argparse
import logging
import os
import re
from kdoc_parser import KernelDoc
from kdoc_output import OutputFormat
class GlobSourceFiles:
"""
Parse C source code file names and directories via an Interactor.
"""
def __init__(self, srctree=None, valid_extensions=None):
"""
Initialize valid extensions with a tuple.
If not defined, assume default C extensions (.c and .h)
It would be possible to use python's glob function, but it is
very slow, and it is not interactive. So, it would wait to read all
directories before actually do something.
So, let's use our own implementation.
"""
if not valid_extensions:
self.extensions = (".c", ".h")
else:
self.extensions = valid_extensions
self.srctree = srctree
def _parse_dir(self, dirname):
"""Internal function to parse files recursively"""
with os.scandir(dirname) as obj:
for entry in obj:
name = os.path.join(dirname, entry.name)
if entry.is_dir(follow_symlinks=False):
yield from self._parse_dir(name)
if not entry.is_file():
continue
basename = os.path.basename(name)
if not basename.endswith(self.extensions):
continue
yield name
def parse_files(self, file_list, file_not_found_cb):
"""
Define an interator to parse all source files from file_list,
handling directories if any
"""
if not file_list:
return
for fname in file_list:
if self.srctree:
f = os.path.join(self.srctree, fname)
else:
f = fname
if os.path.isdir(f):
yield from self._parse_dir(f)
elif os.path.isfile(f):
yield f
elif file_not_found_cb:
file_not_found_cb(fname)
class KernelFiles():
"""
Parse kernel-doc tags on multiple kernel source files.
There are two type of parsers defined here:
- self.parse_file(): parses both kernel-doc markups and
EXPORT_SYMBOL* macros;
- self.process_export_file(): parses only EXPORT_SYMBOL* macros.
"""
def warning(self, msg):
"""Ancillary routine to output a warning and increment error count"""
self.config.log.warning(msg)
self.errors += 1
def error(self, msg):
"""Ancillary routine to output an error and increment error count"""
self.config.log.error(msg)
self.errors += 1
def parse_file(self, fname):
"""
Parse a single Kernel source.
"""
# Prevent parsing the same file twice if results are cached
if fname in self.files:
return
doc = KernelDoc(self.config, fname)
export_table, entries = doc.parse_kdoc()
self.export_table[fname] = export_table
self.files.add(fname)
self.export_files.add(fname) # parse_kdoc() already check exports
self.results[fname] = entries
def process_export_file(self, fname):
"""
Parses EXPORT_SYMBOL* macros from a single Kernel source file.
"""
# Prevent parsing the same file twice if results are cached
if fname in self.export_files:
return
doc = KernelDoc(self.config, fname)
export_table = doc.parse_export()
if not export_table:
self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}")
export_table = set()
self.export_table[fname] = export_table
self.export_files.add(fname)
def file_not_found_cb(self, fname):
"""
Callback to warn if a file was not found.
"""
self.error(f"Cannot find file {fname}")
def __init__(self, verbose=False, out_style=None,
werror=False, wreturn=False, wshort_desc=False,
wcontents_before_sections=False,
logger=None):
"""
Initialize startup variables and parse all files
"""
if not verbose:
verbose = bool(os.environ.get("KBUILD_VERBOSE", 0))
if out_style is None:
out_style = OutputFormat()
if not werror:
kcflags = os.environ.get("KCFLAGS", None)
if kcflags:
match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags)
if match:
werror = True
# reading this variable is for backwards compat just in case
# someone was calling it with the variable from outside the
# kernel's build system
kdoc_werror = os.environ.get("KDOC_WERROR", None)
if kdoc_werror:
werror = kdoc_werror
# Some variables are global to the parser logic as a whole as they are
# used to send control configuration to KernelDoc class. As such,
# those variables are read-only inside the KernelDoc.
self.config = argparse.Namespace
self.config.verbose = verbose
self.config.werror = werror
self.config.wreturn = wreturn
self.config.wshort_desc = wshort_desc
self.config.wcontents_before_sections = wcontents_before_sections
if not logger:
self.config.log = logging.getLogger("kernel-doc")
else:
self.config.log = logger
self.config.warning = self.warning
self.config.src_tree = os.environ.get("SRCTREE", None)
# Initialize variables that are internal to KernelFiles
self.out_style = out_style
self.errors = 0
self.results = {}
self.files = set()
self.export_files = set()
self.export_table = {}
def parse(self, file_list, export_file=None):
"""
Parse all files
"""
glob = GlobSourceFiles(srctree=self.config.src_tree)
for fname in glob.parse_files(file_list, self.file_not_found_cb):
self.parse_file(fname)
for fname in glob.parse_files(export_file, self.file_not_found_cb):
self.process_export_file(fname)
def out_msg(self, fname, name, arg):
"""
Return output messages from a file name using the output style
filtering.
If output type was not handled by the syler, return None.
"""
# NOTE: we can add rules here to filter out unwanted parts,
# although OutputFormat.msg already does that.
return self.out_style.msg(fname, name, arg)
def msg(self, enable_lineno=False, export=False, internal=False,
symbol=None, nosymbol=None, no_doc_sections=False,
filenames=None, export_file=None):
"""
Interacts over the kernel-doc results and output messages,
returning kernel-doc markups on each interaction
"""
self.out_style.set_config(self.config)
if not filenames:
filenames = sorted(self.results.keys())
glob = GlobSourceFiles(srctree=self.config.src_tree)
for fname in filenames:
function_table = set()
if internal or export:
if not export_file:
export_file = [fname]
for f in glob.parse_files(export_file, self.file_not_found_cb):
function_table |= self.export_table[f]
if symbol:
for s in symbol:
function_table.add(s)
self.out_style.set_filter(export, internal, symbol, nosymbol,
function_table, enable_lineno,
no_doc_sections)
msg = ""
if fname not in self.results:
self.config.log.warning("No kernel-doc for file %s", fname)
continue
symbols = self.results[fname]
self.out_style.set_symbols(symbols)
for arg in symbols:
m = self.out_msg(fname, arg.name, arg)
if m is None:
ln = arg.get("ln", 0)
dtype = arg.get('type', "")
self.config.log.warning("%s:%d Can't handle %s",
fname, ln, dtype)
else:
msg += m
if msg:
yield fname, msg

View File

@@ -0,0 +1,43 @@
# SPDX-License-Identifier: GPL-2.0
#
# A class that will, eventually, encapsulate all of the parsed data that we
# then pass into the output modules.
#
class KdocItem:
def __init__(self, name, fname, type, start_line, **other_stuff):
self.name = name
self.fname = fname
self.type = type
self.declaration_start_line = start_line
self.sections = {}
self.sections_start_lines = {}
self.parameterlist = []
self.parameterdesc_start_lines = []
self.parameterdescs = {}
self.parametertypes = {}
#
# Just save everything else into our own dict so that the output
# side can grab it directly as before. As we move things into more
# structured data, this will, hopefully, fade away.
#
self.other_stuff = other_stuff
def get(self, key, default = None):
return self.other_stuff.get(key, default)
def __getitem__(self, key):
return self.get(key)
#
# Tracking of section and parameter information.
#
def set_sections(self, sections, start_lines):
self.sections = sections
self.section_start_lines = start_lines
def set_params(self, names, descs, types, starts):
self.parameterlist = names
self.parameterdescs = descs
self.parametertypes = types
self.parameterdesc_start_lines = starts

View File

@@ -0,0 +1,824 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
#
# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917
"""
Implement output filters to print kernel-doc documentation.
The implementation uses a virtual base class (OutputFormat) which
contains a dispatches to virtual methods, and some code to filter
out output messages.
The actual implementation is done on one separate class per each type
of output. Currently, there are output classes for ReST and man/troff.
"""
import os
import re
from datetime import datetime
from kdoc_parser import KernelDoc, type_param
from kdoc_re import KernRe
function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False)
# match expressions used to find embedded type information
type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False)
type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False)
type_func = KernRe(r"(\w+)\(\)", cache=False)
type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
# Special RST handling for func ptr params
type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False)
# Special RST handling for structs with func ptr params
type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False)
type_env = KernRe(r"(\$\w+)", cache=False)
type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False)
type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False)
type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False)
type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False)
type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False)
type_fallback = KernRe(r"\&([_\w]+)", cache=False)
type_member_func = type_member + KernRe(r"\(\)", cache=False)
class OutputFormat:
"""
Base class for OutputFormat. If used as-is, it means that only
warnings will be displayed.
"""
# output mode.
OUTPUT_ALL = 0 # output all symbols and doc sections
OUTPUT_INCLUDE = 1 # output only specified symbols
OUTPUT_EXPORTED = 2 # output exported symbols
OUTPUT_INTERNAL = 3 # output non-exported symbols
# Virtual member to be overriden at the inherited classes
highlights = []
def __init__(self):
"""Declare internal vars and set mode to OUTPUT_ALL"""
self.out_mode = self.OUTPUT_ALL
self.enable_lineno = None
self.nosymbol = {}
self.symbol = None
self.function_table = None
self.config = None
self.no_doc_sections = False
self.data = ""
def set_config(self, config):
"""
Setup global config variables used by both parser and output.
"""
self.config = config
def set_filter(self, export, internal, symbol, nosymbol, function_table,
enable_lineno, no_doc_sections):
"""
Initialize filter variables according with the requested mode.
Only one choice is valid between export, internal and symbol.
The nosymbol filter can be used on all modes.
"""
self.enable_lineno = enable_lineno
self.no_doc_sections = no_doc_sections
self.function_table = function_table
if symbol:
self.out_mode = self.OUTPUT_INCLUDE
elif export:
self.out_mode = self.OUTPUT_EXPORTED
elif internal:
self.out_mode = self.OUTPUT_INTERNAL
else:
self.out_mode = self.OUTPUT_ALL
if nosymbol:
self.nosymbol = set(nosymbol)
def highlight_block(self, block):
"""
Apply the RST highlights to a sub-block of text.
"""
for r, sub in self.highlights:
block = r.sub(sub, block)
return block
def out_warnings(self, args):
"""
Output warnings for identifiers that will be displayed.
"""
for log_msg in args.warnings:
self.config.warning(log_msg)
def check_doc(self, name, args):
"""Check if DOC should be output"""
if self.no_doc_sections:
return False
if name in self.nosymbol:
return False
if self.out_mode == self.OUTPUT_ALL:
self.out_warnings(args)
return True
if self.out_mode == self.OUTPUT_INCLUDE:
if name in self.function_table:
self.out_warnings(args)
return True
return False
def check_declaration(self, dtype, name, args):
"""
Checks if a declaration should be output or not based on the
filtering criteria.
"""
if name in self.nosymbol:
return False
if self.out_mode == self.OUTPUT_ALL:
self.out_warnings(args)
return True
if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]:
if name in self.function_table:
return True
if self.out_mode == self.OUTPUT_INTERNAL:
if dtype != "function":
self.out_warnings(args)
return True
if name not in self.function_table:
self.out_warnings(args)
return True
return False
def msg(self, fname, name, args):
"""
Handles a single entry from kernel-doc parser
"""
self.data = ""
dtype = args.type
if dtype == "doc":
self.out_doc(fname, name, args)
return self.data
if not self.check_declaration(dtype, name, args):
return self.data
if dtype == "function":
self.out_function(fname, name, args)
return self.data
if dtype == "enum":
self.out_enum(fname, name, args)
return self.data
if dtype == "typedef":
self.out_typedef(fname, name, args)
return self.data
if dtype in ["struct", "union"]:
self.out_struct(fname, name, args)
return self.data
# Warn if some type requires an output logic
self.config.log.warning("doesn't now how to output '%s' block",
dtype)
return None
# Virtual methods to be overridden by inherited classes
# At the base class, those do nothing.
def set_symbols(self, symbols):
"""Get a list of all symbols from kernel_doc"""
def out_doc(self, fname, name, args):
"""Outputs a DOC block"""
def out_function(self, fname, name, args):
"""Outputs a function"""
def out_enum(self, fname, name, args):
"""Outputs an enum"""
def out_typedef(self, fname, name, args):
"""Outputs a typedef"""
def out_struct(self, fname, name, args):
"""Outputs a struct"""
class RestFormat(OutputFormat):
"""Consts and functions used by ReST output"""
highlights = [
(type_constant, r"``\1``"),
(type_constant2, r"``\1``"),
# Note: need to escape () to avoid func matching later
(type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"),
(type_member, r":c:type:`\1\2\3 <\1>`"),
(type_fp_param, r"**\1\\(\\)**"),
(type_fp_param2, r"**\1\\(\\)**"),
(type_func, r"\1()"),
(type_enum, r":c:type:`\1 <\2>`"),
(type_struct, r":c:type:`\1 <\2>`"),
(type_typedef, r":c:type:`\1 <\2>`"),
(type_union, r":c:type:`\1 <\2>`"),
# in rst this can refer to any type
(type_fallback, r":c:type:`\1`"),
(type_param_ref, r"**\1\2**")
]
blankline = "\n"
sphinx_literal = KernRe(r'^[^.].*::$', cache=False)
sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False)
def __init__(self):
"""
Creates class variables.
Not really mandatory, but it is a good coding style and makes
pylint happy.
"""
super().__init__()
self.lineprefix = ""
def print_lineno(self, ln):
"""Outputs a line number"""
if self.enable_lineno and ln is not None:
ln += 1
self.data += f".. LINENO {ln}\n"
def output_highlight(self, args):
"""
Outputs a C symbol that may require being converted to ReST using
the self.highlights variable
"""
input_text = args
output = ""
in_literal = False
litprefix = ""
block = ""
for line in input_text.strip("\n").split("\n"):
# If we're in a literal block, see if we should drop out of it.
# Otherwise, pass the line straight through unmunged.
if in_literal:
if line.strip(): # If the line is not blank
# If this is the first non-blank line in a literal block,
# figure out the proper indent.
if not litprefix:
r = KernRe(r'^(\s*)')
if r.match(line):
litprefix = '^' + r.group(1)
else:
litprefix = ""
output += line + "\n"
elif not KernRe(litprefix).match(line):
in_literal = False
else:
output += line + "\n"
else:
output += line + "\n"
# Not in a literal block (or just dropped out)
if not in_literal:
block += line + "\n"
if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line):
in_literal = True
litprefix = ""
output += self.highlight_block(block)
block = ""
# Handle any remaining block
if block:
output += self.highlight_block(block)
# Print the output with the line prefix
for line in output.strip("\n").split("\n"):
self.data += self.lineprefix + line + "\n"
def out_section(self, args, out_docblock=False):
"""
Outputs a block section.
This could use some work; it's used to output the DOC: sections, and
starts by putting out the name of the doc section itself, but that
tends to duplicate a header already in the template file.
"""
for section, text in args.sections.items():
# Skip sections that are in the nosymbol_table
if section in self.nosymbol:
continue
if out_docblock:
if not self.out_mode == self.OUTPUT_INCLUDE:
self.data += f".. _{section}:\n\n"
self.data += f'{self.lineprefix}**{section}**\n\n'
else:
self.data += f'{self.lineprefix}**{section}**\n\n'
self.print_lineno(args.section_start_lines.get(section, 0))
self.output_highlight(text)
self.data += "\n"
self.data += "\n"
def out_doc(self, fname, name, args):
if not self.check_doc(name, args):
return
self.out_section(args, out_docblock=True)
def out_function(self, fname, name, args):
oldprefix = self.lineprefix
signature = ""
func_macro = args.get('func_macro', False)
if func_macro:
signature = name
else:
if args.get('functiontype'):
signature = args['functiontype'] + " "
signature += name + " ("
ln = args.declaration_start_line
count = 0
for parameter in args.parameterlist:
if count != 0:
signature += ", "
count += 1
dtype = args.parametertypes.get(parameter, "")
if function_pointer.search(dtype):
signature += function_pointer.group(1) + parameter + function_pointer.group(3)
else:
signature += dtype
if not func_macro:
signature += ")"
self.print_lineno(ln)
if args.get('typedef') or not args.get('functiontype'):
self.data += f".. c:macro:: {name}\n\n"
if args.get('typedef'):
self.data += " **Typedef**: "
self.lineprefix = ""
self.output_highlight(args.get('purpose', ""))
self.data += "\n\n**Syntax**\n\n"
self.data += f" ``{signature}``\n\n"
else:
self.data += f"``{signature}``\n\n"
else:
self.data += f".. c:function:: {signature}\n\n"
if not args.get('typedef'):
self.print_lineno(ln)
self.lineprefix = " "
self.output_highlight(args.get('purpose', ""))
self.data += "\n"
# Put descriptive text into a container (HTML <div>) to help set
# function prototypes apart
self.lineprefix = " "
if args.parameterlist:
self.data += ".. container:: kernelindent\n\n"
self.data += f"{self.lineprefix}**Parameters**\n\n"
for parameter in args.parameterlist:
parameter_name = KernRe(r'\[.*').sub('', parameter)
dtype = args.parametertypes.get(parameter, "")
if dtype:
self.data += f"{self.lineprefix}``{dtype}``\n"
else:
self.data += f"{self.lineprefix}``{parameter}``\n"
self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
self.lineprefix = " "
if parameter_name in args.parameterdescs and \
args.parameterdescs[parameter_name] != KernelDoc.undescribed:
self.output_highlight(args.parameterdescs[parameter_name])
self.data += "\n"
else:
self.data += f"{self.lineprefix}*undescribed*\n\n"
self.lineprefix = " "
self.out_section(args)
self.lineprefix = oldprefix
def out_enum(self, fname, name, args):
oldprefix = self.lineprefix
ln = args.declaration_start_line
self.data += f"\n\n.. c:enum:: {name}\n\n"
self.print_lineno(ln)
self.lineprefix = " "
self.output_highlight(args.get('purpose', ''))
self.data += "\n"
self.data += ".. container:: kernelindent\n\n"
outer = self.lineprefix + " "
self.lineprefix = outer + " "
self.data += f"{outer}**Constants**\n\n"
for parameter in args.parameterlist:
self.data += f"{outer}``{parameter}``\n"
if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed:
self.output_highlight(args.parameterdescs[parameter])
else:
self.data += f"{self.lineprefix}*undescribed*\n\n"
self.data += "\n"
self.lineprefix = oldprefix
self.out_section(args)
def out_typedef(self, fname, name, args):
oldprefix = self.lineprefix
ln = args.declaration_start_line
self.data += f"\n\n.. c:type:: {name}\n\n"
self.print_lineno(ln)
self.lineprefix = " "
self.output_highlight(args.get('purpose', ''))
self.data += "\n"
self.lineprefix = oldprefix
self.out_section(args)
def out_struct(self, fname, name, args):
purpose = args.get('purpose', "")
declaration = args.get('definition', "")
dtype = args.type
ln = args.declaration_start_line
self.data += f"\n\n.. c:{dtype}:: {name}\n\n"
self.print_lineno(ln)
oldprefix = self.lineprefix
self.lineprefix += " "
self.output_highlight(purpose)
self.data += "\n"
self.data += ".. container:: kernelindent\n\n"
self.data += f"{self.lineprefix}**Definition**::\n\n"
self.lineprefix = self.lineprefix + " "
declaration = declaration.replace("\t", self.lineprefix)
self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n"
self.data += f"{declaration}{self.lineprefix}" + "};\n\n"
self.lineprefix = " "
self.data += f"{self.lineprefix}**Members**\n\n"
for parameter in args.parameterlist:
if not parameter or parameter.startswith("#"):
continue
parameter_name = parameter.split("[", maxsplit=1)[0]
if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
continue
self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
self.data += f"{self.lineprefix}``{parameter}``\n"
self.lineprefix = " "
self.output_highlight(args.parameterdescs[parameter_name])
self.lineprefix = " "
self.data += "\n"
self.data += "\n"
self.lineprefix = oldprefix
self.out_section(args)
class ManFormat(OutputFormat):
"""Consts and functions used by man pages output"""
highlights = (
(type_constant, r"\1"),
(type_constant2, r"\1"),
(type_func, r"\\fB\1\\fP"),
(type_enum, r"\\fI\1\\fP"),
(type_struct, r"\\fI\1\\fP"),
(type_typedef, r"\\fI\1\\fP"),
(type_union, r"\\fI\1\\fP"),
(type_param, r"\\fI\1\\fP"),
(type_param_ref, r"\\fI\1\2\\fP"),
(type_member, r"\\fI\1\2\3\\fP"),
(type_fallback, r"\\fI\1\\fP")
)
blankline = ""
date_formats = [
"%a %b %d %H:%M:%S %Z %Y",
"%a %b %d %H:%M:%S %Y",
"%Y-%m-%d",
"%b %d %Y",
"%B %d %Y",
"%m %d %Y",
]
def __init__(self, modulename):
"""
Creates class variables.
Not really mandatory, but it is a good coding style and makes
pylint happy.
"""
super().__init__()
self.modulename = modulename
self.symbols = []
dt = None
tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP")
if tstamp:
for fmt in self.date_formats:
try:
dt = datetime.strptime(tstamp, fmt)
break
except ValueError:
pass
if not dt:
dt = datetime.now()
self.man_date = dt.strftime("%B %Y")
def arg_name(self, args, name):
"""
Return the name that will be used for the man page.
As we may have the same name on different namespaces,
prepend the data type for all types except functions and typedefs.
The doc section is special: it uses the modulename.
"""
dtype = args.type
if dtype == "doc":
return self.modulename
if dtype in ["function", "typedef"]:
return name
return f"{dtype} {name}"
def set_symbols(self, symbols):
"""
Get a list of all symbols from kernel_doc.
Man pages will uses it to add a SEE ALSO section with other
symbols at the same file.
"""
self.symbols = symbols
def out_tail(self, fname, name, args):
"""Adds a tail for all man pages"""
# SEE ALSO section
self.data += f'.SH "SEE ALSO"' + "\n.PP\n"
self.data += (f"Kernel file \\fB{args.fname}\\fR\n")
if len(self.symbols) >= 2:
cur_name = self.arg_name(args, name)
related = []
for arg in self.symbols:
out_name = self.arg_name(arg, arg.name)
if cur_name == out_name:
continue
related.append(f"\\fB{out_name}\\fR(9)")
self.data += ",\n".join(related) + "\n"
# TODO: does it make sense to add other sections? Maybe
# REPORTING ISSUES? LICENSE?
def msg(self, fname, name, args):
"""
Handles a single entry from kernel-doc parser.
Add a tail at the end of man pages output.
"""
super().msg(fname, name, args)
self.out_tail(fname, name, args)
return self.data
def output_highlight(self, block):
"""
Outputs a C symbol that may require being highlighted with
self.highlights variable using troff syntax
"""
contents = self.highlight_block(block)
if isinstance(contents, list):
contents = "\n".join(contents)
for line in contents.strip("\n").split("\n"):
line = KernRe(r"^\s*").sub("", line)
if not line:
continue
if line[0] == ".":
self.data += "\\&" + line + "\n"
else:
self.data += line + "\n"
def out_doc(self, fname, name, args):
if not self.check_doc(name, args):
return
out_name = self.arg_name(args, name)
self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
for section, text in args.sections.items():
self.data += f'.SH "{section}"' + "\n"
self.output_highlight(text)
def out_function(self, fname, name, args):
"""output function in man"""
out_name = self.arg_name(args, name)
self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
self.data += ".SH NAME\n"
self.data += f"{name} \\- {args['purpose']}\n"
self.data += ".SH SYNOPSIS\n"
if args.get('functiontype', ''):
self.data += f'.B "{args["functiontype"]}" {name}' + "\n"
else:
self.data += f'.B "{name}' + "\n"
count = 0
parenth = "("
post = ","
for parameter in args.parameterlist:
if count == len(args.parameterlist) - 1:
post = ");"
dtype = args.parametertypes.get(parameter, "")
if function_pointer.match(dtype):
# Pointer-to-function
self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n"
else:
dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype)
self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n"
count += 1
parenth = ""
if args.parameterlist:
self.data += ".SH ARGUMENTS\n"
for parameter in args.parameterlist:
parameter_name = re.sub(r'\[.*', '', parameter)
self.data += f'.IP "{parameter}" 12' + "\n"
self.output_highlight(args.parameterdescs.get(parameter_name, ""))
for section, text in args.sections.items():
self.data += f'.SH "{section.upper()}"' + "\n"
self.output_highlight(text)
def out_enum(self, fname, name, args):
out_name = self.arg_name(args, name)
self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
self.data += ".SH NAME\n"
self.data += f"enum {name} \\- {args['purpose']}\n"
self.data += ".SH SYNOPSIS\n"
self.data += f"enum {name}" + " {\n"
count = 0
for parameter in args.parameterlist:
self.data += f'.br\n.BI " {parameter}"' + "\n"
if count == len(args.parameterlist) - 1:
self.data += "\n};\n"
else:
self.data += ", \n.br\n"
count += 1
self.data += ".SH Constants\n"
for parameter in args.parameterlist:
parameter_name = KernRe(r'\[.*').sub('', parameter)
self.data += f'.IP "{parameter}" 12' + "\n"
self.output_highlight(args.parameterdescs.get(parameter_name, ""))
for section, text in args.sections.items():
self.data += f'.SH "{section}"' + "\n"
self.output_highlight(text)
def out_typedef(self, fname, name, args):
module = self.modulename
purpose = args.get('purpose')
out_name = self.arg_name(args, name)
self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
self.data += ".SH NAME\n"
self.data += f"typedef {name} \\- {purpose}\n"
for section, text in args.sections.items():
self.data += f'.SH "{section}"' + "\n"
self.output_highlight(text)
def out_struct(self, fname, name, args):
module = self.modulename
purpose = args.get('purpose')
definition = args.get('definition')
out_name = self.arg_name(args, name)
self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
self.data += ".SH NAME\n"
self.data += f"{args.type} {name} \\- {purpose}\n"
# Replace tabs with two spaces and handle newlines
declaration = definition.replace("\t", " ")
declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration)
self.data += ".SH SYNOPSIS\n"
self.data += f"{args.type} {name} " + "{" + "\n.br\n"
self.data += f'.BI "{declaration}\n' + "};\n.br\n\n"
self.data += ".SH Members\n"
for parameter in args.parameterlist:
if parameter.startswith("#"):
continue
parameter_name = re.sub(r"\[.*", "", parameter)
if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
continue
self.data += f'.IP "{parameter}" 12' + "\n"
self.output_highlight(args.parameterdescs.get(parameter_name))
for section, text in args.sections.items():
self.data += f'.SH "{section}"' + "\n"
self.output_highlight(text)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
"""
Regular expression ancillary classes.
Those help caching regular expressions and do matching for kernel-doc.
"""
import re
# Local cache for regular expressions
re_cache = {}
class KernRe:
"""
Helper class to simplify regex declaration and usage,
It calls re.compile for a given pattern. It also allows adding
regular expressions and define sub at class init time.
Regular expressions can be cached via an argument, helping to speedup
searches.
"""
def _add_regex(self, string, flags):
"""
Adds a new regex or re-use it from the cache.
"""
self.regex = re_cache.get(string, None)
if not self.regex:
self.regex = re.compile(string, flags=flags)
if self.cache:
re_cache[string] = self.regex
def __init__(self, string, cache=True, flags=0):
"""
Compile a regular expression and initialize internal vars.
"""
self.cache = cache
self.last_match = None
self._add_regex(string, flags)
def __str__(self):
"""
Return the regular expression pattern.
"""
return self.regex.pattern
def __add__(self, other):
"""
Allows adding two regular expressions into one.
"""
return KernRe(str(self) + str(other), cache=self.cache or other.cache,
flags=self.regex.flags | other.regex.flags)
def match(self, string):
"""
Handles a re.match storing its results
"""
self.last_match = self.regex.match(string)
return self.last_match
def search(self, string):
"""
Handles a re.search storing its results
"""
self.last_match = self.regex.search(string)
return self.last_match
def findall(self, string):
"""
Alias to re.findall
"""
return self.regex.findall(string)
def split(self, string):
"""
Alias to re.split
"""
return self.regex.split(string)
def sub(self, sub, string, count=0):
"""
Alias to re.sub
"""
return self.regex.sub(sub, string, count=count)
def group(self, num):
"""
Returns the group results of the last match
"""
return self.last_match.group(num)
class NestedMatch:
"""
Finding nested delimiters is hard with regular expressions. It is
even harder on Python with its normal re module, as there are several
advanced regular expressions that are missing.
This is the case of this pattern:
'\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
which is used to properly match open/close parenthesis of the
string search STRUCT_GROUP(),
Add a class that counts pairs of delimiters, using it to match and
replace nested expressions.
The original approach was suggested by:
https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
Although I re-implemented it to make it more generic and match 3 types
of delimiters. The logic checks if delimiters are paired. If not, it
will ignore the search string.
"""
# TODO: make NestedMatch handle multiple match groups
#
# Right now, regular expressions to match it are defined only up to
# the start delimiter, e.g.:
#
# \bSTRUCT_GROUP\(
#
# is similar to: STRUCT_GROUP\((.*)\)
# except that the content inside the match group is delimiter's aligned.
#
# The content inside parenthesis are converted into a single replace
# group (e.g. r`\1').
#
# It would be nice to change such definition to support multiple
# match groups, allowing a regex equivalent to.
#
# FOO\((.*), (.*), (.*)\)
#
# it is probably easier to define it not as a regular expression, but
# with some lexical definition like:
#
# FOO(arg1, arg2, arg3)
DELIMITER_PAIRS = {
'{': '}',
'(': ')',
'[': ']',
}
RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
def _search(self, regex, line):
"""
Finds paired blocks for a regex that ends with a delimiter.
The suggestion of using finditer to match pairs came from:
https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
but I ended using a different implementation to align all three types
of delimiters and seek for an initial regular expression.
The algorithm seeks for open/close paired delimiters and place them
into a stack, yielding a start/stop position of each match when the
stack is zeroed.
The algorithm shoud work fine for properly paired lines, but will
silently ignore end delimiters that preceeds an start delimiter.
This should be OK for kernel-doc parser, as unaligned delimiters
would cause compilation errors. So, we don't need to rise exceptions
to cover such issues.
"""
stack = []
for match_re in regex.finditer(line):
start = match_re.start()
offset = match_re.end()
d = line[offset - 1]
if d not in self.DELIMITER_PAIRS:
continue
end = self.DELIMITER_PAIRS[d]
stack.append(end)
for match in self.RE_DELIM.finditer(line[offset:]):
pos = match.start() + offset
d = line[pos]
if d in self.DELIMITER_PAIRS:
end = self.DELIMITER_PAIRS[d]
stack.append(end)
continue
# Does the end delimiter match what it is expected?
if stack and d == stack[-1]:
stack.pop()
if not stack:
yield start, offset, pos + 1
break
def search(self, regex, line):
"""
This is similar to re.search:
It matches a regex that it is followed by a delimiter,
returning occurrences only if all delimiters are paired.
"""
for t in self._search(regex, line):
yield line[t[0]:t[2]]
def sub(self, regex, sub, line, count=0):
"""
This is similar to re.sub:
It matches a regex that it is followed by a delimiter,
replacing occurrences only if all delimiters are paired.
if r'\1' is used, it works just like re: it places there the
matched paired data with the delimiter stripped.
If count is different than zero, it will replace at most count
items.
"""
out = ""
cur_pos = 0
n = 0
for start, end, pos in self._search(regex, line):
out += line[cur_pos:start]
# Value, ignoring start/end delimiters
value = line[end:pos - 1]
# replaces \1 at the sub string, if \1 is used there
new_sub = sub
new_sub = new_sub.replace(r'\1', value)
out += new_sub
# Drop end ';' if any
if line[pos] == ';':
pos += 1
cur_pos = pos
n += 1
if count and count >= n:
break
# Append the remaining string
l = len(line)
out += line[cur_pos:l]
return out

View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (C) Akira Yokosawa, 2024
#
# Ported to Python by (c) Mauro Carvalho Chehab, 2025
"""
Detect problematic Noto CJK variable fonts.
For "make pdfdocs", reports of build errors of translations.pdf started
arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE
tumbleweed have started deploying variable-font [3] format of "Noto CJK"
fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK
(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which
does not (and likely never will) understand variable fonts for historical
reasons.
The build error happens even when both of variable- and non-variable-format
fonts are found on the build system. To make matters worse, Fedora enlists
variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN,
-zh_TW, etc. Hence developers who have interest in CJK pages are more
likely to encounter the build errors.
This script is invoked from the error path of "make pdfdocs" and emits
suggestions if variable-font files of "Noto CJK" fonts are in the list of
fonts accessible from XeTeX.
References:
[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/
[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/
[3]: https://en.wikipedia.org/wiki/Variable_font
[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts
[5]: https://build.opensuse.org/request/show/1157217
#===========================================================================
Workarounds for building translations.pdf
#===========================================================================
* Denylist "variable font" Noto CJK fonts.
- Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with
tweaks if necessary. Remove leading "".
- Path of fontconfig/fonts.conf can be overridden by setting an env
variable FONTS_CONF_DENY_VF.
* Template:
-----------------------------------------------------------------
<?xml version="1.0"?>
<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
<fontconfig>
<!--
Ignore variable-font glob (not to break xetex)
-->
<selectfont>
<rejectfont>
<!--
for Fedora
-->
<glob>/usr/share/fonts/google-noto-*-cjk-vf-fonts</glob>
<!--
for openSUSE tumbleweed
-->
<glob>/usr/share/fonts/truetype/Noto*CJK*-VF.otf</glob>
</rejectfont>
</selectfont>
</fontconfig>
-----------------------------------------------------------------
The denylisting is activated for "make pdfdocs".
* For skipping CJK pages in PDF
- Uninstall texlive-xecjk.
Denylisting is not needed in this case.
* For printing CJK pages in PDF
- Need non-variable "Noto CJK" fonts.
* Fedora
- google-noto-sans-cjk-fonts
- google-noto-serif-cjk-fonts
* openSUSE tumbleweed
- Non-variable "Noto CJK" fonts are not available as distro packages
as of April, 2024. Fetch a set of font files from upstream Noto
CJK Font released at:
https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc
and at:
https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc
, then uncompress and deploy them.
- Remember to update fontconfig cache by running fc-cache.
!!! Caution !!!
Uninstalling "variable font" packages can be dangerous.
They might be depended upon by other packages important for your work.
Denylisting should be less invasive, as it is effective only while
XeLaTeX runs in "make pdfdocs".
"""
import os
import re
import subprocess
import textwrap
import sys
class LatexFontChecker:
"""
Detect problems with CJK variable fonts that affect PDF builds for
translations.
"""
def __init__(self, deny_vf=None):
if not deny_vf:
deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf")
self.environ = os.environ.copy()
self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf)
self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK")
def description(self):
return __doc__
def get_noto_cjk_vf_fonts(self):
"""Get Noto CJK fonts"""
cjk_fonts = set()
cmd = ["fc-list", ":", "file", "family", "variable"]
try:
result = subprocess.run(cmd,stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
env=self.environ,
check=True)
except subprocess.CalledProcessError as exc:
sys.exit(f"Error running fc-list: {repr(exc)}")
for line in result.stdout.splitlines():
if 'variable=True' not in line:
continue
match = self.re_cjk.search(line)
if match:
cjk_fonts.add(match.group(1))
return sorted(cjk_fonts)
def check(self):
"""Check for problems with CJK fonts"""
fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ")
if not fonts:
return None
rel_file = os.path.relpath(__file__, os.getcwd())
msg = "=" * 77 + "\n"
msg += 'XeTeX is confused by "variable font" files listed below:\n'
msg += fonts + "\n"
msg += textwrap.dedent(f"""
For CJK pages in PDF, they need to be hidden from XeTeX by denylisting.
Or, CJK pages can be skipped by uninstalling texlive-xecjk.
For more info on denylisting, other options, and variable font, run:
tools/docs/check-variable-fonts.py -h
""")
msg += "=" * 77
return msg

View File

@@ -0,0 +1,482 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
# pylint: disable=R0912,R0915
"""
Parse a source file or header, creating ReStructured Text cross references.
It accepts an optional file to change the default symbol reference or to
suppress symbols from the output.
It is capable of identifying defines, functions, structs, typedefs,
enums and enum symbols and create cross-references for all of them.
It is also capable of distinguish #define used for specifying a Linux
ioctl.
The optional rules file contains a set of rules like:
ignore ioctl VIDIOC_ENUM_FMT
replace ioctl VIDIOC_DQBUF vidioc_qbuf
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
"""
import os
import re
import sys
class ParseDataStructs:
"""
Creates an enriched version of a Kernel header file with cross-links
to each C data structure type.
It is meant to allow having a more comprehensive documentation, where
uAPI headers will create cross-reference links to the code.
It is capable of identifying defines, functions, structs, typedefs,
enums and enum symbols and create cross-references for all of them.
It is also capable of distinguish #define used for specifying a Linux
ioctl.
By default, it create rules for all symbols and defines, but it also
allows parsing an exception file. Such file contains a set of rules
using the syntax below:
1. Ignore rules:
ignore <type> <symbol>`
Removes the symbol from reference generation.
2. Replace rules:
replace <type> <old_symbol> <new_reference>
Replaces how old_symbol with a new reference. The new_reference can be:
- A simple symbol name;
- A full Sphinx reference.
3. Namespace rules
namespace <namespace>
Sets C namespace to be used during cross-reference generation. Can
be overridden by replace rules.
On ignore and replace rules, <type> can be:
- ioctl: for defines that end with _IO*, e.g. ioctl definitions
- define: for other defines
- symbol: for symbols defined within enums;
- typedef: for typedefs;
- enum: for the name of a non-anonymous enum;
- struct: for structs.
Examples:
ignore define __LINUX_MEDIA_H
ignore ioctl VIDIOC_ENUM_FMT
replace ioctl VIDIOC_DQBUF vidioc_qbuf
replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
namespace MC
"""
# Parser regexes with multiple ways to capture enums and structs
RE_ENUMS = [
re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
]
RE_STRUCTS = [
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
]
# FIXME: the original code was written a long time before Sphinx C
# domain to have multiple namespaces. To avoid to much turn at the
# existing hyperlinks, the code kept using "c:type" instead of the
# right types. To change that, we need to change the types not only
# here, but also at the uAPI media documentation.
DEF_SYMBOL_TYPES = {
"ioctl": {
"prefix": "\\ ",
"suffix": "\\ ",
"ref_type": ":ref",
"description": "IOCTL Commands",
},
"define": {
"prefix": "\\ ",
"suffix": "\\ ",
"ref_type": ":ref",
"description": "Macros and Definitions",
},
# We're calling each definition inside an enum as "symbol"
"symbol": {
"prefix": "\\ ",
"suffix": "\\ ",
"ref_type": ":ref",
"description": "Enumeration values",
},
"typedef": {
"prefix": "\\ ",
"suffix": "\\ ",
"ref_type": ":c:type",
"description": "Type Definitions",
},
# This is the description of the enum itself
"enum": {
"prefix": "\\ ",
"suffix": "\\ ",
"ref_type": ":c:type",
"description": "Enumerations",
},
"struct": {
"prefix": "\\ ",
"suffix": "\\ ",
"ref_type": ":c:type",
"description": "Structures",
},
}
def __init__(self, debug: bool = False):
"""Initialize internal vars"""
self.debug = debug
self.data = ""
self.symbols = {}
self.namespace = None
self.ignore = []
self.replace = []
for symbol_type in self.DEF_SYMBOL_TYPES:
self.symbols[symbol_type] = {}
def read_exceptions(self, fname: str):
if not fname:
return
name = os.path.basename(fname)
with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
for ln, line in enumerate(f):
ln += 1
line = line.strip()
if not line or line.startswith("#"):
continue
# ignore rules
match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
if match:
self.ignore.append((ln, match.group(1), match.group(2)))
continue
# replace rules
match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
if match:
self.replace.append((ln, match.group(1), match.group(2),
match.group(3)))
continue
match = re.match(r"^namespace\s+(\S+)", line)
if match:
self.namespace = match.group(1)
continue
sys.exit(f"{name}:{ln}: invalid line: {line}")
def apply_exceptions(self):
"""
Process exceptions file with rules to ignore or replace references.
"""
# Handle ignore rules
for ln, c_type, symbol in self.ignore:
if c_type not in self.DEF_SYMBOL_TYPES:
sys.exit(f"{name}:{ln}: {c_type} is invalid")
d = self.symbols[c_type]
if symbol in d:
del d[symbol]
# Handle replace rules
for ln, c_type, old, new in self.replace:
if c_type not in self.DEF_SYMBOL_TYPES:
sys.exit(f"{name}:{ln}: {c_type} is invalid")
reftype = None
# Parse reference type when the type is specified
match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
if match:
reftype = f":c:{match.group(1)}"
new = match.group(2)
else:
match = re.search(r"(\:ref)\:\`(.+)\`", new)
if match:
reftype = match.group(1)
new = match.group(2)
# If the replacement rule doesn't have a type, get default
if not reftype:
reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
if not reftype:
reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
new_ref = f"{reftype}:`{old} <{new}>`"
# Change self.symbols to use the replacement rule
if old in self.symbols[c_type]:
(_, ln) = self.symbols[c_type][old]
self.symbols[c_type][old] = (new_ref, ln)
else:
print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
def store_type(self, ln, symbol_type: str, symbol: str,
ref_name: str = None, replace_underscores: bool = True):
"""
Stores a new symbol at self.symbols under symbol_type.
By default, underscores are replaced by "-"
"""
defs = self.DEF_SYMBOL_TYPES[symbol_type]
prefix = defs.get("prefix", "")
suffix = defs.get("suffix", "")
ref_type = defs.get("ref_type")
# Determine ref_link based on symbol type
if ref_type or self.namespace:
if not ref_name:
ref_name = symbol.lower()
# c-type references don't support hash
if ref_type == ":ref" and replace_underscores:
ref_name = ref_name.replace("_", "-")
# C domain references may have namespaces
if ref_type.startswith(":c:"):
if self.namespace:
ref_name = f"{self.namespace}.{ref_name}"
if ref_type:
ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
else:
ref_link = f"`{symbol} <{ref_name}>`"
else:
ref_link = symbol
self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
def store_line(self, line):
"""Stores a line at self.data, properly indented"""
line = " " + line.expandtabs()
self.data += line.rstrip(" ")
def parse_file(self, file_in: str, exceptions: str = None):
"""Reads a C source file and get identifiers"""
self.data = ""
is_enum = False
is_comment = False
multiline = ""
self.read_exceptions(exceptions)
with open(file_in, "r",
encoding="utf-8", errors="backslashreplace") as f:
for line_no, line in enumerate(f):
self.store_line(line)
line = line.strip("\n")
# Handle continuation lines
if line.endswith(r"\\"):
multiline += line[-1]
continue
if multiline:
line = multiline + line
multiline = ""
# Handle comments. They can be multilined
if not is_comment:
if re.search(r"/\*.*", line):
is_comment = True
else:
# Strip C99-style comments
line = re.sub(r"(//.*)", "", line)
if is_comment:
if re.search(r".*\*/", line):
is_comment = False
else:
multiline = line
continue
# At this point, line variable may be a multilined statement,
# if lines end with \ or if they have multi-line comments
# With that, it can safely remove the entire comments,
# and there's no need to use re.DOTALL for the logic below
line = re.sub(r"(/\*.*\*/)", "", line)
if not line.strip():
continue
# It can be useful for debug purposes to print the file after
# having comments stripped and multi-lines grouped.
if self.debug > 1:
print(f"line {line_no + 1}: {line}")
# Now the fun begins: parse each type and store it.
# We opted for a two parsing logic here due to:
# 1. it makes easier to debug issues not-parsed symbols;
# 2. we want symbol replacement at the entire content, not
# just when the symbol is detected.
if is_enum:
match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
if match:
self.store_type(line_no, "symbol", match.group(1))
if "}" in line:
is_enum = False
continue
match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
if match:
self.store_type(line_no, "ioctl", match.group(1),
replace_underscores=False)
continue
match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
if match:
self.store_type(line_no, "define", match.group(1))
continue
match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
line)
if match:
name = match.group(2).strip()
symbol = match.group(3)
self.store_type(line_no, "typedef", symbol, ref_name=name)
continue
for re_enum in self.RE_ENUMS:
match = re_enum.match(line)
if match:
self.store_type(line_no, "enum", match.group(1))
is_enum = True
break
for re_struct in self.RE_STRUCTS:
match = re_struct.match(line)
if match:
self.store_type(line_no, "struct", match.group(1))
break
self.apply_exceptions()
def debug_print(self):
"""
Print debug information containing the replacement rules per symbol.
To make easier to check, group them per type.
"""
if not self.debug:
return
for c_type, refs in self.symbols.items():
if not refs: # Skip empty dictionaries
continue
print(f"{c_type}:")
for symbol, (ref, ln) in sorted(refs.items()):
print(f" #{ln:<5d} {symbol} -> {ref}")
print()
def gen_output(self):
"""Write the formatted output to a file."""
# Avoid extra blank lines
text = re.sub(r"\s+$", "", self.data) + "\n"
text = re.sub(r"\n\s+\n", "\n\n", text)
# Escape Sphinx special characters
text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
# Source uAPI files may have special notes. Use bold font for them
text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
# Delimiters to catch the entire symbol after escaped
start_delim = r"([ \n\t\(=\*\@])"
end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
# Process all reference types
for ref_dict in self.symbols.values():
for symbol, (replacement, _) in ref_dict.items():
symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
text = re.sub(fr'{start_delim}{symbol}{end_delim}',
fr'\1{replacement}\2', text)
# Remove "\ " where not needed: before spaces and at the end of lines
text = re.sub(r"\\ ([\n ])", r"\1", text)
text = re.sub(r" \\ ", " ", text)
return text
def gen_toc(self):
"""
Create a list of symbols to be part of a TOC contents table
"""
text = []
# Sort symbol types per description
symbol_descriptions = []
for k, v in self.DEF_SYMBOL_TYPES.items():
symbol_descriptions.append((v['description'], k))
symbol_descriptions.sort()
# Process each category
for description, c_type in symbol_descriptions:
refs = self.symbols[c_type]
if not refs: # Skip empty categories
continue
text.append(f"{description}")
text.append("-" * len(description))
text.append("")
# Sort symbols alphabetically
for symbol, (ref, ln) in sorted(refs.items()):
text.append(f"- LINENO_{ln}: {ref}")
text.append("") # Add empty line between categories
return "\n".join(text)
def write_output(self, file_in: str, file_out: str, toc: bool):
title = os.path.basename(file_in)
if toc:
text = self.gen_toc()
else:
text = self.gen_output()
with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
f.write(f"{title}\n")
f.write("=" * len(title) + "\n\n")
if not toc:
f.write(".. parsed-literal::\n\n")
f.write(text)

View File

@@ -0,0 +1,178 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
"""
Handle Python version check logic.
Not all Python versions are supported by scripts. Yet, on some cases,
like during documentation build, a newer version of python could be
available.
This class allows checking if the minimal requirements are followed.
Better than that, PythonVersion.check_python() not only checks the minimal
requirements, but it automatically switches to a the newest available
Python version if present.
"""
import os
import re
import subprocess
import shlex
import sys
from glob import glob
from textwrap import indent
class PythonVersion:
"""
Ancillary methods that checks for missing dependencies for different
types of types, like binaries, python modules, rpm deps, etc.
"""
def __init__(self, version):
"""Ïnitialize self.version tuple from a version string"""
self.version = self.parse_version(version)
@staticmethod
def parse_version(version):
"""Convert a major.minor.patch version into a tuple"""
return tuple(int(x) for x in version.split("."))
@staticmethod
def ver_str(version):
"""Returns a version tuple as major.minor.patch"""
return ".".join([str(x) for x in version])
@staticmethod
def cmd_print(cmd, max_len=80):
cmd_line = []
for w in cmd:
w = shlex.quote(w)
if cmd_line:
if not max_len or len(cmd_line[-1]) + len(w) < max_len:
cmd_line[-1] += " " + w
continue
else:
cmd_line[-1] += " \\"
cmd_line.append(w)
else:
cmd_line.append(w)
return "\n ".join(cmd_line)
def __str__(self):
"""Returns a version tuple as major.minor.patch from self.version"""
return self.ver_str(self.version)
@staticmethod
def get_python_version(cmd):
"""
Get python version from a Python binary. As we need to detect if
are out there newer python binaries, we can't rely on sys.release here.
"""
kwargs = {}
if sys.version_info < (3, 7):
kwargs['universal_newlines'] = True
else:
kwargs['text'] = True
result = subprocess.run([cmd, "--version"],
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
**kwargs, check=False)
version = result.stdout.strip()
match = re.search(r"(\d+\.\d+\.\d+)", version)
if match:
return PythonVersion.parse_version(match.group(1))
print(f"Can't parse version {version}")
return (0, 0, 0)
@staticmethod
def find_python(min_version):
"""
Detect if are out there any python 3.xy version newer than the
current one.
Note: this routine is limited to up to 2 digits for python3. We
may need to update it one day, hopefully on a distant future.
"""
patterns = [
"python3.[0-9][0-9]",
"python3.[0-9]",
]
python_cmd = []
# Seek for a python binary newer than min_version
for path in os.getenv("PATH", "").split(":"):
for pattern in patterns:
for cmd in glob(os.path.join(path, pattern)):
if os.path.isfile(cmd) and os.access(cmd, os.X_OK):
version = PythonVersion.get_python_version(cmd)
if version >= min_version:
python_cmd.append((version, cmd))
return sorted(python_cmd, reverse=True)
@staticmethod
def check_python(min_version, show_alternatives=False, bail_out=False,
success_on_error=False):
"""
Check if the current python binary satisfies our minimal requirement
for Sphinx build. If not, re-run with a newer version if found.
"""
cur_ver = sys.version_info[:3]
if cur_ver >= min_version:
ver = PythonVersion.ver_str(cur_ver)
return
python_ver = PythonVersion.ver_str(cur_ver)
available_versions = PythonVersion.find_python(min_version)
if not available_versions:
print(f"ERROR: Python version {python_ver} is not spported anymore\n")
print(" Can't find a new version. This script may fail")
return
script_path = os.path.abspath(sys.argv[0])
# Check possible alternatives
if available_versions:
new_python_cmd = available_versions[0][1]
else:
new_python_cmd = None
if show_alternatives and available_versions:
print("You could run, instead:")
for _, cmd in available_versions:
args = [cmd, script_path] + sys.argv[1:]
cmd_str = indent(PythonVersion.cmd_print(args), " ")
print(f"{cmd_str}\n")
if bail_out:
msg = f"Python {python_ver} not supported. Bailing out"
if success_on_error:
print(msg, file=sys.stderr)
sys.exit(0)
else:
sys.exit(msg)
print(f"Python {python_ver} not supported. Changing to {new_python_cmd}")
# Restart script using the newer version
args = [new_python_cmd, script_path] + sys.argv[1:]
try:
os.execv(new_python_cmd, args)
except OSError as e:
sys.exit(f"Failed to restart with {new_python_cmd}: {e}")