diff options
| author | David Fussner <dfussner@googlemail.com> | 2024-06-10 14:16:04 +0100 |
|---|---|---|
| committer | Stefan Kangas <stefankangas@gmail.com> | 2024-09-14 17:05:33 +0200 |
| commit | b44c00669ace7b9e6a90aecb5f4e9f4edf6ed25a (patch) | |
| tree | 7119d2ff1695271877a27a1a0cf0b0d648983d1c /lib-src | |
| parent | 98e582e74a2bbc2c7fdef02b8cd90036fa217712 (diff) | |
Provide a modified xref backend for TeX buffers
In addition to providing a new `xref' backend, the patch also improves
the general handling of expl3 syntax. Expl3 is the next-generation
LaTeX specification, and has for some time been available by default in
the LaTeX kernel. The new syntax co-exists in many files with the
standard LaTeX2e syntax, so we try at least minimally to separate the
way modes handle the two specifications, both to reduce
visually-disturbing interference between them and also to improve the
`xref' backend. (Bug#53749)
* lib-src/etags.c (TeX_commands): Improve parsing of commands in TeX
buffers.
(TEX_defenv): Expand list of commands to tag by default in TeX buffers.
(TeX_help):
* doc/emacs/maintaining.texi (Tag Syntax): Document new tagged commands.
(Identifier Search): Add note about semantic-symref-filepattern-alist,
auto-mode-alist, and xref-find-references.
* lisp/textmodes/tex-mode.el (tex-font-lock-suscript): Test for
underscore in expl3 files and regions, disable subscript face there.
(tex-common-initialization): Set up xref backend for in-tree TeX modes.
Detect expl3 files, and in others set up a list of expl3 regions.
(tex-expl-buffer-parse): New function called in previous.
(tex-expl-buffer-p): New variable to hold the result of previous.
(tex-expl-region-set): New function added to
'syntax-propertize-extend-region-functions' hook.
(tex-expl-region-list): New variable to hold the result of previous.
(tex--xref-backend): New function to identify the xref backend.
(tex--thing-at-point, tex-thingatpt--beginning-of-symbol)
(tex-thingatpt--end-of-symbol, tex--bounds-of-symbol-at-point):
New functions to return 'thing-at-point' for xref backend.
(tex-thingatpt-exclude-chars): New variable to do the same.
(xref-backend-identifier-at-point): New TeX backend method to provide
symbols for processing by xref.
(xref-backend-identifier-completion-table)
(xref-backend-identifier-completion-ignore-case)
(xref-backend-definitions, xref-backend-apropos): Placeholders to
call the standard 'etags' xref backend methods.
(xref-backend-references): Wrapper to call the default xref backend
method, finding as many relevant files as possible and using a bespoke
syntax-propertize-function when required.
(tex--collect-file-extensions, tex-xref-syntax-function): Helper
functions for previous.
(tex-find-references-syntax-table, tex--buffers-list)
(tex--xref-syntax-fun, tex--old-syntax-function): New variables for
the same.
Diffstat (limited to 'lib-src')
| -rw-r--r-- | lib-src/etags.c | 186 |
1 files changed, 171 insertions, 15 deletions
diff --git a/lib-src/etags.c b/lib-src/etags.c index 556b7d701fc..7f652790261 100644 --- a/lib-src/etags.c +++ b/lib-src/etags.c @@ -793,11 +793,27 @@ variables set with 'set!' at top level in the file."; static const char *TeX_suffixes [] = { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL }; static const char TeX_help [] = -"In LaTeX text, the argument of any of the commands '\\chapter',\n\ -'\\section', '\\subsection', '\\subsubsection', '\\eqno', '\\label',\n\ -'\\ref', '\\cite', '\\bibitem', '\\part', '\\appendix', '\\entry',\n\ -'\\index', '\\def', '\\newcommand', '\\renewcommand',\n\ -'\\newenvironment' or '\\renewenvironment' is a tag.\n\ +"In LaTeX text, the argument of the commands '\\chapter', '\\section',\n\ +'\\subsection', '\\subsubsection', '\\eqno', '\\label', '\\ref',\n\ +'\\Ref', '\\footref', '\\cite', '\\bibitem', '\\part', '\\appendix',\n\ +'\\entry', '\\index', '\\def', '\\edef', '\\gdef', '\\xdef',\n\ +'\\newcommand', '\\renewcommand', '\\newrobustcmd', '\\renewrobustcmd',\n\ +'\\newenvironment', '\\renewenvironment', '\\DeclareRobustCommand',\n\ +'\\providecommand', '\\providerobustcmd', '\\NewDocumentCommand',\n\ +'\\RenewDocumentCommand', '\\ProvideDocumentCommand',\n\ +'\\DeclareDocumentCommand', '\\NewExpandableDocumentCommand',\n\ +'\\RenewExpandableDocumentCommand', '\\ProvideExpandableDocumentCommand',\n\ +'\\DeclareExpandableDocumentCommand', '\\NewDocumentEnvironment',\n\ +'\\RenewDocumentEnvironment', '\\ProvideDocumentEnvironment',\n\ +'\\DeclareDocumentEnvironment','\\csdef', '\\csedef', '\\csgdef',\n\ +'\\csxdef', '\\csletcs', '\\cslet', '\\letcs', '\\let',\n\ +'\\cs_new_protected_nopar', '\\cs_new_protected', '\\cs_new_nopar',\n\ +'\\cs_new_eq', '\\cs_new', '\\cs_set_protected_nopar',\n\ +'\\cs_set_protected', '\\cs_set_nopar', '\\cs_set_eq', '\\cs_set',\n\ +'\\cs_gset_protected_nopar', '\\cs_gset_protected', '\\cs_gset_nopar',\n\ +'\\cs_gset_eq', '\\cs_gset', '\\cs_generate_from_arg_count', or\n\ +'\\cs_generate_variant' is a tag. So is the argument of any starred\n\ +variant of these commands.\n\ \n\ Other commands can be specified by setting the environment variable\n\ 'TEXTAGS' to a colon-separated list like, for example,\n\ @@ -5746,9 +5762,20 @@ static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */ /* Default set of control sequences to put into TEX_toktab. The value of environment var TEXTAGS is prepended to this. */ static const char *TEX_defenv = "\ -:chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\ -:part:appendix:entry:index:def\ -:newcommand:renewcommand:newenvironment:renewenvironment"; +:label:ref:Ref:footref:chapter:section:subsection:subsubsection:eqno:cite\ +:bibitem:part:appendix:entry:index:def:edef:gdef:xdef:newcommand:renewcommand\ +:newenvironment:renewenvironment:DeclareRobustCommand:renewrobustcmd\ +:newrobustcmd:providecommand:providerobustcmd:NewDocumentCommand\ +:RenewDocumentCommand:ProvideDocumentCommand:DeclareDocumentCommand\ +:NewExpandableDocumentCommand:RenewExpandableDocumentCommand\ +:ProvideExpandableDocumentCommand:DeclareExpandableDocumentCommand\ +:NewDocumentEnvironment:RenewDocumentEnvironment\ +:ProvideDocumentEnvironment:DeclareDocumentEnvironment:csdef\ +:csedef:csgdef:csxdef:csletcs:cslet:letcs:let:cs_new_protected_nopar\ +:cs_new_protected:cs_new_nopar:cs_new_eq:cs_new:cs_set_protected_nopar\ +:cs_set_protected:cs_set_nopar:cs_set_eq:cs_set:cs_gset_protected_nopar\ +:cs_gset_protected:cs_gset_nopar:cs_gset_eq:cs_gset\ +:cs_generate_from_arg_count:cs_generate_variant"; static void TEX_decode_env (const char *, const char *); @@ -5807,19 +5834,139 @@ TeX_commands (FILE *inf) { char *p; ptrdiff_t namelen, linelen; - bool opgrp = false; + bool opgrp = false, one_esc = false, is_explthree = false; cp = skip_spaces (cp + key->len); + + /* 1. The canonical expl3 syntax looks something like this: + \cs_new:Npn \__hook_tl_gput:Nn { \ERROR }. First, if we + want to tag any such commands, we include only the part + before the colon (cs_new) in TEX_defenv or TEXTAGS. Second, + etags skips the argument specifier (including the colon) + after the tag token, so that it doesn't become the tag name. + Third, we set the boolean 'is_explthree' to true so that we + can remove the argument specifier from the actual tag name + (__hook_tl_gput). This all allows us to include expl3 + constructs in TEX_defenv or in the environment variable + TEXTAGS without requiring a change of separator, and it also + allows us to find the definition of variant commands (with + different argument specifiers) defined using, for example, + \cs_generate_variant:Nn. Please note that the expl3 spec + requires etags to pay more attention to whitespace in the + code. + + 2. We also automatically remove the asterisk from starred + variants of all commands, without the need to include the + starred commands explicitly in TEX_defenv or TEXTAGS. */ + if (*cp == ':') + { + while (!c_isspace (*cp) && *cp != TEX_opgrp) + cp++; + cp = skip_spaces (cp); + is_explthree = true; + } + else if (*cp == '*') + cp++; + + /* Skip the optional arguments to commands in the tags list so + that these arguments don't end up as the name of the tag. + The name will instead come from the argument in curly braces + that follows the optional ones. The '\let' command gets + special treatment. */ + while (*cp != '\0' && *cp != '%' + && !streq (key->buffer, "let")) + { + if (*cp == '[') + { + while (*cp != ']' && *cp != '\0' && *cp != '%') + cp++; + } + else if (*cp == '(') + { + while (*cp != ')' && *cp != '\0' && *cp != '%') + cp++; + } + else if (*cp == ']' || *cp == ')') + cp++; + else + break; + } if (*cp == TEX_opgrp) { opgrp = true; cp++; + cp = skip_spaces (cp); /* For expl3 code. */ } + + /* Removing the TeX escape character from tag names simplifies + things for editors finding tagged commands in TeX buffers. + This applies to Emacs but also to the tag-finding behavior + of at least some of the editors that use ctags, though in + the latter case this will remain suboptimal. The + undocumented ctags option '--no-duplicates' may help. */ + if (*cp == TEX_esc) + { + cp++; + one_esc = true; + } + + /* Testing !c_isspace && !c_ispunct is simpler, but halts + processing at too many places. The list as it stands tries + both to ensure that tag names will derive from macro names + rather than from optional parameters to those macros, and + also to return findable names while still allowing for + unorthodox constructs. */ for (p = cp; - (!c_isspace (*p) && *p != '#' && - *p != TEX_opgrp && *p != TEX_clgrp); + (!c_isspace (*p) && *p != '#' && *p != '=' && + *p != '[' && *p != '(' && *p != TEX_opgrp && + *p != TEX_clgrp && *p != '"' && *p != '\'' && + *p != '%' && *p != ',' && *p != '|' && *p != '$'); p++) - continue; + /* In expl3 code we remove the argument specification from + the tag name. More generally we allow only one (deleted) + escape char in a tag name, which (primarily) enables + tagging a TeX command's different, possibly temporary, + '\let' bindings. */ + if (is_explthree && *p == ':') + break; + else if (*p == TEX_esc) + { /* Second part of test is for, e.g., \cslet. */ + if (!one_esc && !opgrp) + { + one_esc = true; + continue; + } + else + break; + } + else + continue; + /* For TeX files, tags without a name are basically cruft, and + in some situations they can produce spurious and confusing + matches. Try to catch as many cases as possible where a + command name is of the form '\(', but avoid, as far as + possible, the spurious matches. */ + if (p == cp) + { + switch (*p) + { /* Include =? */ + case '(': case '[': case '"': case '\'': + case '\\': case '!': case '=': case ',': + case '|': case '$': + p++; + break; + case '{': case '}': case '<': case '>': + if (!opgrp) + { + p++; + if (*p == '\0' || *p == '%') + goto tex_next_line; + } + break; + default: + break; + } + } namelen = p - cp; linelen = lb.len; if (!opgrp || *p == TEX_clgrp) @@ -5828,9 +5975,18 @@ TeX_commands (FILE *inf) p++; linelen = p - lb.buffer + 1; } - make_tag (cp, namelen, true, - lb.buffer, linelen, lineno, linecharno); - goto tex_next_line; /* We only tag a line once */ + if (namelen) + make_tag (cp, namelen, true, + lb.buffer, linelen, lineno, linecharno); + /* Lines with more than one \def or \let are surprisingly + common in TeX files, especially in the system files that + form the basis of the various TeX formats. This tags them + all. */ + /* goto tex_next_line; /\* We only tag a line once *\/ */ + while (*cp != '\0' && *cp != '%' && *cp != TEX_esc) + cp++; + if (*cp != TEX_esc) + goto tex_next_line; } } tex_next_line: |
