diff --git a/doc/admin-guide/configuration/hrw4u.en.rst b/doc/admin-guide/configuration/hrw4u.en.rst index 8a3bed3d862..35af67894ff 100644 --- a/doc/admin-guide/configuration/hrw4u.en.rst +++ b/doc/admin-guide/configuration/hrw4u.en.rst @@ -80,15 +80,56 @@ follows to produce the help output: hrw4u --help -Doing a compile is simply: +Basic Usage +^^^^^^^^^^^ + +Compile a single file to stdout: .. code-block:: none hrw4u some_file.hrw4u -in Addition to ``hrw4u``, you also have the reverse tool, converting existing ``header_rewrite`` -configurations to ``hrw4u``. This tool is named ``u4wrh``. For people using IDEs, the package also -provides an LSP for this language, named ``hrw4u-lsp``. +Compile from stdin: + +.. code-block:: none + + cat some_file.hrw4u | hrw4u + +Compile multiple files to stdout (separated by ``# ---``): + +.. code-block:: none + + hrw4u file1.hrw4u file2.hrw4u file3.hrw4u + +Bulk Compilation +^^^^^^^^^^^^^^^^ + +For bulk compilation, use the ``input:output`` format to compile multiple files +to their respective output files in a single command: + +.. code-block:: none + + hrw4u file1.hrw4u:file1.conf file2.hrw4u:file2.conf file3.hrw4u:file3.conf + +This is particularly useful for build systems or when processing many configuration +files at once. All files are processed in a single invocation, improving performance +for large batches of files. + +Reverse Tool (u4wrh) +^^^^^^^^^^^^^^^^^^^^ + +In addition to ``hrw4u``, you also have the reverse tool, converting existing ``header_rewrite`` +configurations to ``hrw4u``. This tool is named ``u4wrh`` and supports the same usage patterns: + +.. code-block:: none + + # Convert single file to stdout + u4wrh existing_config.conf + + # Bulk conversion + u4wrh file1.conf:file1.hrw4u file2.conf:file2.hrw4u + +For people using IDEs, the package also provides an LSP for this language, named ``hrw4u-lsp``. Syntax Differences ================== diff --git a/tools/hrw4u/scripts/hrw4u b/tools/hrw4u/scripts/hrw4u index a5e8b98e7ab..72dfc82e571 100755 --- a/tools/hrw4u/scripts/hrw4u +++ b/tools/hrw4u/scripts/hrw4u @@ -22,28 +22,19 @@ from __future__ import annotations from hrw4u.hrw4uLexer import hrw4uLexer from hrw4u.hrw4uParser import hrw4uParser from hrw4u.visitor import HRW4UVisitor -from hrw4u.common import create_base_parser, create_parse_tree, generate_output, process_input +from hrw4u.common import run_main def main() -> None: """Main entry point for the hrw4u script.""" - parser, output_group = create_base_parser("Process HRW4U input and produce output (AST or HRW).") - - # Argument parsing - output_group.add_argument("--hrw", action="store_true", help="Produce the HRW output (default)") - parser.add_argument("--no-comments", action="store_true", help="Skip comment preservation (ignore comments in output)") - args = parser.parse_args() - - # Default to HRW output if neither AST nor HRW specified - if not (args.ast or args.hrw): - args.hrw = True - - content, filename = process_input(args.input_file) - tree, parser_obj, error_collector = create_parse_tree( - content, filename, hrw4uLexer, hrw4uParser, "hrw4u", not args.stop_on_error) - - # Generate output - generate_output(tree, parser_obj, HRW4UVisitor, filename, args, error_collector) + run_main( + description="Process HRW4U input and produce output (AST or HRW).", + lexer_class=hrw4uLexer, + parser_class=hrw4uParser, + visitor_class=HRW4UVisitor, + error_prefix="hrw4u", + output_flag_name="hrw", + output_flag_help="Produce the HRW output (default)") if __name__ == "__main__": diff --git a/tools/hrw4u/scripts/u4wrh b/tools/hrw4u/scripts/u4wrh index 129552d7e71..2730661e5ba 100755 --- a/tools/hrw4u/scripts/u4wrh +++ b/tools/hrw4u/scripts/u4wrh @@ -19,7 +19,7 @@ from __future__ import annotations -from hrw4u.common import create_base_parser, create_parse_tree, generate_output, process_input +from hrw4u.common import run_main from u4wrh.hrw_visitor import HRWInverseVisitor from u4wrh.u4wrhLexer import u4wrhLexer from u4wrh.u4wrhParser import u4wrhParser @@ -27,23 +27,14 @@ from u4wrh.u4wrhParser import u4wrhParser def main() -> None: """Main entry point for the u4wrh script.""" - parser, output_group = create_base_parser("Process header_rewrite (HRW) lines and reconstruct hrw4u source.") - - # Argument parsing - output_group.add_argument("--hrw4u", action="store_true", help="Produce reconstructed hrw4u output (default)") - parser.add_argument("--no-comments", action="store_true", help="Skip comment preservation (ignore comments in output)") - args = parser.parse_args() - - # Default to hrw4u output if neither AST nor hrw4u specified - if not (args.ast or args.hrw4u): - args.hrw4u = True - - content, filename = process_input(args.input_file) - tree, parser_obj, error_collector = create_parse_tree( - content, filename, u4wrhLexer, u4wrhParser, "u4wrh", not args.stop_on_error) - - # Generate output - generate_output(tree, parser_obj, HRWInverseVisitor, filename, args, error_collector) + run_main( + description="Process header_rewrite (HRW) lines and reconstruct hrw4u source.", + lexer_class=u4wrhLexer, + parser_class=u4wrhParser, + visitor_class=HRWInverseVisitor, + error_prefix="u4wrh", + output_flag_name="hrw4u", + output_flag_help="Produce reconstructed hrw4u output (default)") if __name__ == "__main__": diff --git a/tools/hrw4u/src/common.py b/tools/hrw4u/src/common.py index 28478933c0f..d694f0d58de 100644 --- a/tools/hrw4u/src/common.py +++ b/tools/hrw4u/src/common.py @@ -229,3 +229,107 @@ def generate_output( print(error_collector.get_error_summary(), file=sys.stderr) if not args.ast and tree is None: sys.exit(1) + + +def run_main( + description: str, lexer_class: type[LexerProtocol], parser_class: type[ParserProtocol], + visitor_class: type[VisitorProtocol], error_prefix: str, output_flag_name: str, output_flag_help: str) -> None: + """ + Generic main function for hrw4u and u4wrh scripts with bulk compilation support. + + Args: + description: Description for argument parser + lexer_class: ANTLR lexer class to use + parser_class: ANTLR parser class to use + visitor_class: Visitor class to use + error_prefix: Error prefix for error messages + output_flag_name: Name of output flag (e.g., "hrw", "hrw4u") + output_flag_help: Help text for output flag + """ + parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog="For bulk compilation to files, use: input1.txt:output1.txt input2.txt:output2.txt ...") + + parser.add_argument( + "files", help="Input file(s) to parse. Use input:output for bulk file output (default: stdin to stdout)", nargs="*") + + output_group = parser.add_mutually_exclusive_group() + output_group.add_argument("--ast", action="store_true", help="Produce the ANTLR parse tree only") + output_group.add_argument(f"--{output_flag_name}", action="store_true", help=output_flag_help) + + parser.add_argument("--no-comments", action="store_true", help="Skip comment preservation (ignore comments in output)") + parser.add_argument("--debug", action="store_true", help="Enable debug output") + parser.add_argument( + "--stop-on-error", action="store_true", help="Stop processing on first error (default: collect and report multiple errors)") + + args = parser.parse_args() + + if not hasattr(args, output_flag_name): + setattr(args, output_flag_name, False) + + if not (args.ast or getattr(args, output_flag_name)): + setattr(args, output_flag_name, True) + + if not args.files: + content, filename = process_input(sys.stdin) + tree, parser_obj, error_collector = create_parse_tree( + content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error) + generate_output(tree, parser_obj, visitor_class, filename, args, error_collector) + return + + if any(':' in f for f in args.files): + for pair in args.files: + if ':' not in pair: + print( + f"Error: Mixed formats not allowed. All files must use 'input:output' format for bulk compilation.", + file=sys.stderr) + sys.exit(1) + + input_path, output_path = pair.split(':', 1) + + try: + with open(input_path, 'r', encoding='utf-8') as input_file: + content = input_file.read() + filename = input_path + except FileNotFoundError: + print(f"Error: Input file '{input_path}' not found", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error reading '{input_path}': {e}", file=sys.stderr) + sys.exit(1) + + tree, parser_obj, error_collector = create_parse_tree( + content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error) + + try: + with open(output_path, 'w', encoding='utf-8') as output_file: + original_stdout = sys.stdout + try: + sys.stdout = output_file + generate_output(tree, parser_obj, visitor_class, filename, args, error_collector) + finally: + sys.stdout = original_stdout + except Exception as e: + print(f"Error writing to '{output_path}': {e}", file=sys.stderr) + sys.exit(1) + else: + for i, input_path in enumerate(args.files): + if i > 0: + print("# ---") + + try: + with open(input_path, 'r', encoding='utf-8') as input_file: + content = input_file.read() + filename = input_path + except FileNotFoundError: + print(f"Error: Input file '{input_path}' not found", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"Error reading '{input_path}': {e}", file=sys.stderr) + sys.exit(1) + + tree, parser_obj, error_collector = create_parse_tree( + content, filename, lexer_class, parser_class, error_prefix, not args.stop_on_error) + + generate_output(tree, parser_obj, visitor_class, filename, args, error_collector) diff --git a/tools/hrw4u/src/hrw_visitor.py b/tools/hrw4u/src/hrw_visitor.py index 149b139dfe5..cbaa8f7efff 100644 --- a/tools/hrw4u/src/hrw_visitor.py +++ b/tools/hrw4u/src/hrw_visitor.py @@ -45,7 +45,7 @@ def __init__( super().__init__(filename=filename, debug=debug, error_collector=error_collector) # HRW inverse-specific state - self.section_label = section_label + self._section_label = section_label self.preserve_comments = preserve_comments self._pending_terms: list[tuple[str, CondState]] = [] self._in_group: bool = False diff --git a/tools/hrw4u/tests/test_bulk.py b/tools/hrw4u/tests/test_bulk.py new file mode 100644 index 00000000000..2968e9ed519 --- /dev/null +++ b/tools/hrw4u/tests/test_bulk.py @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import pytest +import utils + + +@pytest.mark.conds +def test_conds_bulk_compilation() -> None: + """Test bulk compilation of all conds test cases.""" + utils.run_bulk_test("conds") + + +@pytest.mark.examples +def test_examples_bulk_compilation() -> None: + """Test bulk compilation of all examples test cases.""" + utils.run_bulk_test("examples") + + +@pytest.mark.hooks +def test_hooks_bulk_compilation() -> None: + """Test bulk compilation of all hooks test cases.""" + utils.run_bulk_test("hooks") + + +@pytest.mark.ops +def test_ops_bulk_compilation() -> None: + """Test bulk compilation of all ops test cases.""" + utils.run_bulk_test("ops") + + +@pytest.mark.vars +def test_vars_bulk_compilation() -> None: + """Test bulk compilation of all vars test cases.""" + utils.run_bulk_test("vars") diff --git a/tools/hrw4u/tests/test_cli.py b/tools/hrw4u/tests/test_cli.py new file mode 100644 index 00000000000..30afb30a740 --- /dev/null +++ b/tools/hrw4u/tests/test_cli.py @@ -0,0 +1,184 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import subprocess +import sys +import tempfile +from pathlib import Path + +import pytest + + +@pytest.fixture +def sample_hrw4u_files(tmp_path: Path) -> tuple[Path, Path, Path]: + """Create sample hrw4u files for testing.""" + file1 = tmp_path / "test1.hrw4u" + file1.write_text("REMAP { no-op(); }\n") + + file2 = tmp_path / "test2.hrw4u" + file2.write_text("READ_RESPONSE { inbound.resp.X-Test = \"foo\"; }\n") + + file3 = tmp_path / "test3.hrw4u" + file3.write_text("SEND_REQUEST { outbound.req.X-Custom = \"bar\"; }\n") + + return file1, file2, file3 + + +def run_hrw4u(args: list[str], stdin: str | None = None) -> subprocess.CompletedProcess: + """Run hrw4u script with given arguments.""" + script = Path("scripts/hrw4u").resolve() + cmd = [sys.executable, str(script)] + args + + return subprocess.run(cmd, capture_output=True, text=True, input=stdin, cwd=Path.cwd()) + + +def test_cli_single_file_to_stdout(sample_hrw4u_files: tuple[Path, Path, Path]) -> None: + """Test compiling a single file to stdout.""" + file1, _, _ = sample_hrw4u_files + + result = run_hrw4u([str(file1)]) + + assert result.returncode == 0 + assert "no-op" in result.stdout + assert "REMAP" in result.stdout or "cond" in result.stdout + + +def test_cli_multiple_files_to_stdout(sample_hrw4u_files: tuple[Path, Path, Path]) -> None: + """Test compiling multiple files to stdout with separators.""" + file1, file2, file3 = sample_hrw4u_files + + result = run_hrw4u([str(file1), str(file2), str(file3)]) + + assert result.returncode == 0 + assert "# ---" in result.stdout + assert result.stdout.count("# ---") == 2 + assert "no-op" in result.stdout + assert "X-Test" in result.stdout + assert "X-Custom" in result.stdout + + +def test_cli_stdin_to_stdout() -> None: + """Test reading from stdin and writing to stdout.""" + input_content = "REMAP { inbound.req.X-Stdin = \"test\"; }\n" + + result = run_hrw4u([], stdin=input_content) + + assert result.returncode == 0 + assert "X-Stdin" in result.stdout + + +def test_cli_bulk_input_output_pairs(sample_hrw4u_files: tuple[Path, Path, Path], tmp_path: Path) -> None: + """Test bulk compilation with input:output pairs.""" + file1, file2, _ = sample_hrw4u_files + out1 = tmp_path / "out1.conf" + out2 = tmp_path / "out2.conf" + + result = run_hrw4u([f"{file1}:{out1}", f"{file2}:{out2}"]) + + assert result.returncode == 0 + assert out1.exists() + assert out2.exists() + assert "no-op" in out1.read_text() + assert "X-Test" in out2.read_text() + + +def test_cli_mixed_format_error(sample_hrw4u_files: tuple[Path, Path, Path], tmp_path: Path) -> None: + """Test that mixing formats (with and without colons) produces an error.""" + file1, file2, _ = sample_hrw4u_files + out2 = tmp_path / "out2.conf" + + result = run_hrw4u([str(file1), f"{file2}:{out2}"]) + + assert result.returncode != 0 + assert "Mixed formats not allowed" in result.stderr + + +def test_cli_nonexistent_input_file() -> None: + """Test error handling for nonexistent input file.""" + result = run_hrw4u(["nonexistent_file.hrw4u"]) + + assert result.returncode != 0 + assert "not found" in result.stderr + + +def test_cli_bulk_nonexistent_input_file(tmp_path: Path) -> None: + """Test error handling for nonexistent input file in bulk mode.""" + out = tmp_path / "out.conf" + + result = run_hrw4u([f"nonexistent_file.hrw4u:{out}"]) + + assert result.returncode != 0 + assert "not found" in result.stderr + + +def test_cli_ast_output(sample_hrw4u_files: tuple[Path, Path, Path]) -> None: + """Test AST output mode.""" + file1, _, _ = sample_hrw4u_files + + result = run_hrw4u(["--ast", str(file1)]) + + assert result.returncode == 0 + assert "program" in result.stdout.lower() or "(" in result.stdout + + +def test_cli_help_output() -> None: + """Test help output.""" + result = run_hrw4u(["--help"]) + + assert result.returncode == 0 + assert "usage:" in result.stdout.lower() + assert "hrw4u" in result.stdout.lower() + assert "bulk" in result.stdout.lower() + + +def test_u4wrh_single_file_to_stdout(tmp_path: Path) -> None: + """Test u4wrh script with single file to stdout.""" + hrw_file = tmp_path / "test.conf" + hrw_file.write_text('cond %{HEADER:X-Test} ="foo"\nset-header X-Response "bar"\n') + + script = Path("scripts/u4wrh").resolve() + cmd = [sys.executable, str(script), str(hrw_file)] + + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd()) + + assert result.returncode == 0 + assert "X-Test" in result.stdout + assert "X-Response" in result.stdout + + +def test_u4wrh_bulk_mode(tmp_path: Path) -> None: + """Test u4wrh bulk compilation mode.""" + hrw1 = tmp_path / "test1.conf" + hrw1.write_text('cond %{HEADER:X-Test} ="foo"\nset-header X-Response "bar"\n') + + hrw2 = tmp_path / "test2.conf" + hrw2.write_text('set-status 404\n') + + out1 = tmp_path / "out1.hrw4u" + out2 = tmp_path / "out2.hrw4u" + + script = Path("scripts/u4wrh").resolve() + cmd = [sys.executable, str(script), f"{hrw1}:{out1}", f"{hrw2}:{out2}"] + + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd()) + + assert result.returncode == 0 + assert out1.exists() + assert out2.exists() + assert "X-Test" in out1.read_text() + assert "404" in out2.read_text() diff --git a/tools/hrw4u/tests/utils.py b/tools/hrw4u/tests/utils.py index 8dbe2cadbb5..fac320fbcd0 100644 --- a/tools/hrw4u/tests/utils.py +++ b/tools/hrw4u/tests/utils.py @@ -17,6 +17,9 @@ from __future__ import annotations import re +import subprocess +import sys +import tempfile from pathlib import Path from typing import Final, Iterator @@ -52,6 +55,7 @@ def __init__(self, filename: str, line: int, column: int, message: str, source_l "run_ast_test", "run_failing_test", "run_reverse_test", + "run_bulk_test", ] @@ -315,3 +319,69 @@ def test_reverse_conversion(input_file: Path, output_file: Path) -> None: run_reverse_test(input_file, output_file) return test_reverse_conversion + + +def run_bulk_test(group: str) -> None: + """ + Run bulk compilation test for a specific test group. + + Collects all .input.txt files in the group, runs hrw4u with bulk + input:output pairs, and compares each output with expected .output.txt. + """ + base_dir = Path("tests/data") / group + exceptions = _read_exceptions(base_dir) + + input_files = [] + expected_outputs = [] + file_pairs = [] + + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = Path(tmpdir) + + for input_file in sorted(base_dir.glob("*.input.txt")): + if ".fail." in input_file.name: + continue + + base = input_file.with_suffix('') + expected_output_file = base.with_suffix('.output.txt') + test_id = base.name + + if test_id in exceptions: + test_direction = exceptions[test_id] + if test_direction != "hrw4u": + continue + + if not expected_output_file.exists(): + continue + + input_files.append(input_file) + expected_outputs.append(expected_output_file) + + actual_output_file = tmp_path / f"{input_file.stem}.output.txt" + file_pairs.append(f"{input_file.resolve()}:{actual_output_file.resolve()}") + + if not file_pairs: + pytest.skip(f"No valid test files found for bulk test in {group}") + return + + hrw4u_script = Path("scripts/hrw4u").resolve() + cmd = [sys.executable, str(hrw4u_script)] + file_pairs + + result = subprocess.run(cmd, capture_output=True, text=True, cwd=Path.cwd()) + + if result.returncode != 0: + pytest.fail(f"hrw4u bulk compilation failed:\nstdout: {result.stdout}\nstderr: {result.stderr}") + + for input_file, expected_output_file in zip(input_files, expected_outputs): + actual_output_file = tmp_path / f"{input_file.stem}.output.txt" + + if not actual_output_file.exists(): + pytest.fail(f"Output file not created for {input_file.name}: {actual_output_file}") + + actual_output = actual_output_file.read_text().strip() + expected_output = expected_output_file.read_text().strip() + + assert actual_output == expected_output, ( + f"Bulk output mismatch for {input_file.name}\n" + f"Expected:\n{expected_output}\n\n" + f"Actual:\n{actual_output}")