Skip to content

Commit 11d1d76

Browse files
committed
fix: remove .br before empty lines to fix mandoc warnings
- Enhanced post-processing to remove .br macros that appear before empty lines - This fixes 'WARNING: skipping paragraph macro: br before sp' from mandoc - Also handles the common pattern of .br-empty-.br by removing both .br macros - Added comprehensive test cases for the new patterns - All mandoc 'br before sp' warnings are now resolved
1 parent f848e3a commit 11d1d76

4 files changed

Lines changed: 85 additions & 53 deletions

File tree

Cargo.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ uudoc = [
3535
"dep:clap_complete",
3636
"dep:clap_mangen",
3737
"dep:fluent-syntax",
38+
"dep:jiff",
3839
"dep:regex",
3940
"dep:zip",
4041
]
@@ -477,13 +478,14 @@ uu_checksum_common = { version = "0.7.0", path = "src/uu/checksum_common" }
477478
uutests = { version = "0.7.0", package = "uutests", path = "tests/uutests" }
478479

479480
[dependencies]
481+
clap.workspace = true
480482
clap_complete = { workspace = true, optional = true }
481483
clap_mangen = { workspace = true, optional = true }
482-
clap.workspace = true
483484
fluent-syntax = { workspace = true, optional = true }
484-
regex = { workspace = true, optional = true }
485485
itertools.workspace = true
486+
jiff = { workspace = true, optional = true }
486487
phf.workspace = true
488+
regex = { workspace = true, optional = true }
487489
selinux = { workspace = true, optional = true }
488490
textwrap.workspace = true
489491
uucore.workspace = true

src/bin/uudoc.rs

Lines changed: 73 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
// spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77

88
use std::{
9-
collections::{HashMap, HashSet},
9+
collections::HashMap,
1010
ffi::OsString,
1111
fs::File,
1212
io::{self, Read, Seek, Write},
@@ -17,8 +17,8 @@ use clap::{Arg, Command};
1717
use clap_complete::Shell;
1818
use clap_mangen::Man;
1919
use fluent_syntax::ast::{Entry, Message, Pattern};
20-
use jiff::Zoned;
2120
use fluent_syntax::parser;
21+
use jiff::Zoned;
2222
use regex::Regex;
2323
use textwrap::{fill, indent, termwidth};
2424
use zip::ZipArchive;
@@ -31,17 +31,16 @@ include!(concat!(env!("OUT_DIR"), "/uutils_map.rs"));
3131
/// Post-process a generated manpage to fix mandoc lint issues
3232
///
3333
/// This function:
34-
/// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+
/// - Fixes the TH header by uppercasing command names and adding a proper date
3535
/// - Removes trailing whitespace from all lines
3636
/// - Fixes redundant .br paragraph macros that cause mandoc warnings
37-
fn post_process_manpage(manpage: String) -> String {
37+
/// - Removes .br before empty lines to avoid "br before sp" warnings
38+
/// - Removes .br after empty lines to avoid "br after sp" warnings
39+
/// - Fixes escape sequences (e.g., \\\\0 to \\0) to avoid "undefined escape" warnings
40+
fn post_process_manpage(manpage: String, date: &str) -> String {
3841
// Only match TH headers that have at least a command name on the same line
3942
// Use [ \t] instead of \s to avoid matching newlines
4043
// Use a date format that satisfies mandoc (YYYY-MM-DD)
41-
let date = date.map_or_else(
42-
|| Zoned::now().strftime("%Y-%m-%d").to_string(),
43-
str::to_string,
44-
);
4544

4645
let th_regex = Regex::new(r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$").unwrap();
4746
let mut result = th_regex
@@ -52,34 +51,24 @@ fn post_process_manpage(manpage: String) -> String {
5251
.to_string();
5352

5453
// Process lines: remove trailing whitespace and fix .br issues in a single pass
55-
let lines: Vec<&str> = result.lines().collect();
56-
let mut fixed_lines = Vec::with_capacity(lines.len());
57-
let mut skip_indices = HashSet::new();
54+
let lines: Vec<&str> = result.lines().map(str::trim_end).collect();
55+
let mut fixed_lines: Vec<&str> = Vec::with_capacity(lines.len());
5856

59-
// First pass: identify lines to skip (redundant .br macros)
6057
for i in 0..lines.len() {
61-
let line = lines[i].trim_end();
58+
let line = lines[i];
6259

63-
if line == ".br" && !skip_indices.contains(&i) {
64-
// Check for consecutive .br macros
65-
if i > 0 && lines[i - 1].trim_end() == ".br" {
66-
skip_indices.insert(i);
67-
}
68-
// Check for .br, empty line, .br pattern
69-
else if i + 2 < lines.len()
70-
&& lines[i + 1].trim().is_empty()
71-
&& lines[i + 2].trim_end() == ".br"
72-
{
73-
skip_indices.insert(i + 2);
60+
if line == ".br" {
61+
let preceded_by_empty_line = i > 0 && lines[i - 1].is_empty();
62+
let followed_by_empty_line = i + 1 < lines.len() && lines[i + 1].is_empty();
63+
let followed_by_br = i + 1 < lines.len() && lines[i + 1] == ".br";
64+
65+
if preceded_by_empty_line || followed_by_empty_line || followed_by_br {
66+
// skip this ".br"
67+
continue;
7468
}
7569
}
76-
}
7770

78-
// Second pass: build the final output
79-
for (i, line) in lines.iter().enumerate() {
80-
if !skip_indices.contains(&i) {
81-
fixed_lines.push(line.trim_end());
82-
}
71+
fixed_lines.push(line);
8372
}
8473

8574
result = fixed_lines.join("\n");
@@ -174,7 +163,8 @@ fn gen_manpage<T: Args>(
174163
let manpage = String::from_utf8(buffer).expect("Invalid UTF-8 in manpage");
175164

176165
// Post-process the manpage to fix mandoc lint issues
177-
let processed_manpage = post_process_manpage(manpage, None);
166+
let date = Zoned::now().strftime("%Y-%m-%d").to_string();
167+
let processed_manpage = post_process_manpage(manpage, &date);
178168

179169
// Write the processed manpage to stdout
180170
io::stdout()
@@ -726,7 +716,7 @@ mod tests {
726716
".TH cat 1 \"cat (uutils coreutils) 0.7.0\"\n.SH NAME\ncat - concatenate files\n";
727717
let expected = ".TH CAT 1 \"2024-01-01\"\n.SH NAME\ncat - concatenate files\n";
728718

729-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
719+
let result = post_process_manpage(input.to_string(), "2024-01-01");
730720
assert_eq!(result, expected);
731721
}
732722

@@ -736,7 +726,7 @@ mod tests {
736726
let input = ".TH TEST 1 \nSome text with trailing spaces \n.SH SECTION \n";
737727
let expected = ".TH TEST 1 \"2024-01-01\"\nSome text with trailing spaces\n.SH SECTION\n";
738728

739-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
729+
let result = post_process_manpage(input.to_string(), "2024-01-01");
740730
assert_eq!(result, expected);
741731
}
742732

@@ -746,7 +736,7 @@ mod tests {
746736
let input = ".TH TEST 1\n.br\n.br\nSome text\n";
747737
let expected = ".TH TEST 1 \"2024-01-01\"\n.br\nSome text\n";
748738

749-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
739+
let result = post_process_manpage(input.to_string(), "2024-01-01");
750740
assert_eq!(result, expected);
751741
}
752742

@@ -757,7 +747,7 @@ mod tests {
757747
let input = ".TH TEST 1\n.br\n\n.br\nSome text\n";
758748
let expected = ".TH TEST 1 \"2024-01-01\"\n\nSome text\n";
759749

760-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
750+
let result = post_process_manpage(input.to_string(), "2024-01-01");
761751
assert_eq!(result, expected);
762752
}
763753

@@ -767,7 +757,7 @@ mod tests {
767757
let input = ".TH TEST 1\nLine 1\n.br\nLine 2\n";
768758
let expected = ".TH TEST 1 \"2024-01-01\"\nLine 1\n.br\nLine 2\n";
769759

770-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
760+
let result = post_process_manpage(input.to_string(), "2024-01-01");
771761
assert_eq!(result, expected);
772762
}
773763

@@ -777,7 +767,7 @@ mod tests {
777767
let input = ".TH CaT 1 \"some version info\"\nContent\n";
778768
let expected = ".TH CAT 1 \"2024-01-01\"\nContent\n";
779769

780-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
770+
let result = post_process_manpage(input.to_string(), "2024-01-01");
781771
assert_eq!(result, expected);
782772
}
783773

@@ -787,7 +777,7 @@ mod tests {
787777
let input = ".SH NAME\ntest - a test utility\n";
788778
let expected = ".SH NAME\ntest - a test utility\n";
789779

790-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
780+
let result = post_process_manpage(input.to_string(), "2024-01-01");
791781
assert_eq!(result, expected);
792782
}
793783

@@ -799,7 +789,7 @@ mod tests {
799789
// .br followed/preceded by empty lines should be removed, consecutive .br should have one removed
800790
let expected = ".TH TEST 1 \"2024-01-01\"\nSection 1\n\nMiddle\n.br\nSection 2\n\nEnd\n";
801791

802-
let result = post_process_manpage(input.to_string(), Some("2024-01-01"));
792+
let result = post_process_manpage(input.to_string(), "2024-01-01");
803793
assert_eq!(result, expected);
804794
}
805795

@@ -808,25 +798,65 @@ mod tests {
808798
// Test that malformed TH headers don't cause panics and are handled gracefully
809799
let input1 = ".TH\nContent\n"; // Missing command name
810800
let expected1 = ".TH\nContent\n";
811-
let result1 = post_process_manpage(input1.to_string(), Some("2024-01-01"));
801+
let result1 = post_process_manpage(input1.to_string(), "2024-01-01");
812802
assert_eq!(result1, expected1);
813803

814804
// TH header with special characters
815805
let input2 = ".TH test-cmd 1 \"version 1.0\"\nContent\n";
816806
let expected2 = ".TH TEST-CMD 1 \"2024-01-01\"\nContent\n";
817-
let result2 = post_process_manpage(input2.to_string(), Some("2024-01-01"));
807+
let result2 = post_process_manpage(input2.to_string(), "2024-01-01");
818808
assert_eq!(result2, expected2);
819809

820810
// TH header at end of file without newline
821811
let input3 = "Content\n.TH test 1";
822812
let expected3 = "Content\n.TH TEST 1 \"2024-01-01\"\n";
823-
let result3 = post_process_manpage(input3.to_string(), Some("2024-01-01"));
813+
let result3 = post_process_manpage(input3.to_string(), "2024-01-01");
824814
assert_eq!(result3, expected3);
825815

826816
// Multiple TH headers (only first should be processed due to ^anchor)
827817
let input4 = ".TH first 1\nMiddle\n.TH second 1\n";
828818
let expected4 = ".TH FIRST 1 \"2024-01-01\"\nMiddle\n.TH SECOND 1 \"2024-01-01\"\n";
829-
let result4 = post_process_manpage(input4.to_string(), Some("2024-01-01"));
819+
let result4 = post_process_manpage(input4.to_string(), "2024-01-01");
830820
assert_eq!(result4, expected4);
831821
}
822+
823+
#[test]
824+
fn test_post_process_manpage_removes_br_before_empty_line() {
825+
// Test that .br is removed when followed by empty line (which becomes .sp)
826+
let input = ".TH TEST 1\nSome text\n.br\n\nMore text\n";
827+
let expected = ".TH TEST 1 \"2024-01-01\"\nSome text\n\nMore text\n";
828+
829+
let result = post_process_manpage(input.to_string(), "2024-01-01");
830+
assert_eq!(result, expected);
831+
}
832+
833+
#[test]
834+
fn test_post_process_manpage_complex_br_before_empty() {
835+
// Test multiple .br before empty line patterns
836+
let input = ".TH TEST 1\nSection 1\n.br\n\nSection 2\n.br\n\nSection 3\n";
837+
let expected = ".TH TEST 1 \"2024-01-01\"\nSection 1\n\nSection 2\n\nSection 3\n";
838+
839+
let result = post_process_manpage(input.to_string(), "2024-01-01");
840+
assert_eq!(result, expected);
841+
}
842+
843+
#[test]
844+
fn test_post_process_manpage_removes_br_after_empty_line() {
845+
// Test that .br is removed when preceded by empty line (which becomes .sp)
846+
let input = ".TH TEST 1\nSome text\n\n.br\nMore text\n";
847+
let expected = ".TH TEST 1 \"2024-01-01\"\nSome text\n\nMore text\n";
848+
849+
let result = post_process_manpage(input.to_string(), "2024-01-01");
850+
assert_eq!(result, expected);
851+
}
852+
853+
#[test]
854+
fn test_post_process_manpage_fixes_escape_sequences() {
855+
// Test that \\\\0 and \\0 are fixed to \e0 (literal backslash-zero)
856+
let input = ".TH TEST 1\nText with \\\\\\\\0 and \\\\0 escape\n";
857+
let expected = ".TH TEST 1 \"2024-01-01\"\nText with \\e0 and \\e0 escape\n";
858+
859+
let result = post_process_manpage(input.to_string(), "2024-01-01");
860+
assert_eq!(result, expected);
861+
}
832862
}

tests/uudoc/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ fn test_manpage_generation() {
3636
);
3737

3838
let output_str = String::from_utf8_lossy(&output.stdout);
39-
assert!(output_str.contains("\n.TH ls"), "{output_str}");
39+
assert!(output_str.contains("\n.TH LS"), "{output_str}");
4040
assert!(output_str.contains('1'), "{output_str}");
4141
assert!(output_str.contains("\n.SH NAME\nls"), "{output_str}");
4242
}
@@ -62,7 +62,7 @@ fn test_manpage_coreutils() {
6262
);
6363

6464
let output_str = String::from_utf8_lossy(&output.stdout);
65-
assert!(output_str.contains("\n.TH coreutils"), "{output_str}");
65+
assert!(output_str.contains("\n.TH COREUTILS"), "{output_str}");
6666
assert!(output_str.contains("coreutils"), "{output_str}");
6767
assert!(output_str.contains("\n.SH NAME\ncoreutils"), "{output_str}");
6868
}

0 commit comments

Comments
 (0)