66// spell-checker:ignore mangen tldr mandoc uppercasing uppercased manpages DESTDIR
77
88use std:: {
9- collections:: { HashMap , HashSet } ,
9+ collections:: HashMap ,
1010 ffi:: OsString ,
1111 fs:: File ,
1212 io:: { self , Read , Seek , Write } ,
@@ -17,8 +17,8 @@ use clap::{Arg, Command};
1717use clap_complete:: Shell ;
1818use clap_mangen:: Man ;
1919use fluent_syntax:: ast:: { Entry , Message , Pattern } ;
20- use jiff:: Zoned ;
2120use fluent_syntax:: parser;
21+ use jiff:: Zoned ;
2222use regex:: Regex ;
2323use textwrap:: { fill, indent, termwidth} ;
2424use zip:: ZipArchive ;
@@ -31,17 +31,16 @@ include!(concat!(env!("OUT_DIR"), "/uutils_map.rs"));
3131/// Post-process a generated manpage to fix mandoc lint issues
3232///
3333/// This function:
34- /// - Fixes the TH header by uppercasing command names and removing invalid date formats
34+ /// - Fixes the TH header by uppercasing command names and adding a proper date
3535/// - Removes trailing whitespace from all lines
3636/// - Fixes redundant .br paragraph macros that cause mandoc warnings
37- fn post_process_manpage ( manpage : String ) -> String {
37+ /// - Removes .br before empty lines to avoid "br before sp" warnings
38+ /// - Removes .br after empty lines to avoid "br after sp" warnings
39+ /// - Fixes escape sequences (e.g., \\\\0 to \\0) to avoid "undefined escape" warnings
40+ fn post_process_manpage ( manpage : String , date : & str ) -> String {
3841 // Only match TH headers that have at least a command name on the same line
3942 // Use [ \t] instead of \s to avoid matching newlines
4043 // Use a date format that satisfies mandoc (YYYY-MM-DD)
41- let date = date. map_or_else (
42- || Zoned :: now ( ) . strftime ( "%Y-%m-%d" ) . to_string ( ) ,
43- str:: to_string,
44- ) ;
4544
4645 let th_regex = Regex :: new ( r"(?m)^\.TH[ \t]+([^ \t\n]+)(?:[ \t]+[^\n]*)?$" ) . unwrap ( ) ;
4746 let mut result = th_regex
@@ -52,34 +51,24 @@ fn post_process_manpage(manpage: String) -> String {
5251 . to_string ( ) ;
5352
5453 // Process lines: remove trailing whitespace and fix .br issues in a single pass
55- let lines: Vec < & str > = result. lines ( ) . collect ( ) ;
56- let mut fixed_lines = Vec :: with_capacity ( lines. len ( ) ) ;
57- let mut skip_indices = HashSet :: new ( ) ;
54+ let lines: Vec < & str > = result. lines ( ) . map ( str:: trim_end) . collect ( ) ;
55+ let mut fixed_lines: Vec < & str > = Vec :: with_capacity ( lines. len ( ) ) ;
5856
59- // First pass: identify lines to skip (redundant .br macros)
6057 for i in 0 ..lines. len ( ) {
61- let line = lines[ i] . trim_end ( ) ;
58+ let line = lines[ i] ;
6259
63- if line == ".br" && !skip_indices. contains ( & i) {
64- // Check for consecutive .br macros
65- if i > 0 && lines[ i - 1 ] . trim_end ( ) == ".br" {
66- skip_indices. insert ( i) ;
67- }
68- // Check for .br, empty line, .br pattern
69- else if i + 2 < lines. len ( )
70- && lines[ i + 1 ] . trim ( ) . is_empty ( )
71- && lines[ i + 2 ] . trim_end ( ) == ".br"
72- {
73- skip_indices. insert ( i + 2 ) ;
60+ if line == ".br" {
61+ let preceded_by_empty_line = i > 0 && lines[ i - 1 ] . is_empty ( ) ;
62+ let followed_by_empty_line = i + 1 < lines. len ( ) && lines[ i + 1 ] . is_empty ( ) ;
63+ let followed_by_br = i + 1 < lines. len ( ) && lines[ i + 1 ] == ".br" ;
64+
65+ if preceded_by_empty_line || followed_by_empty_line || followed_by_br {
66+ // skip this ".br"
67+ continue ;
7468 }
7569 }
76- }
7770
78- // Second pass: build the final output
79- for ( i, line) in lines. iter ( ) . enumerate ( ) {
80- if !skip_indices. contains ( & i) {
81- fixed_lines. push ( line. trim_end ( ) ) ;
82- }
71+ fixed_lines. push ( line) ;
8372 }
8473
8574 result = fixed_lines. join ( "\n " ) ;
@@ -174,7 +163,8 @@ fn gen_manpage<T: Args>(
174163 let manpage = String :: from_utf8 ( buffer) . expect ( "Invalid UTF-8 in manpage" ) ;
175164
176165 // Post-process the manpage to fix mandoc lint issues
177- let processed_manpage = post_process_manpage ( manpage, None ) ;
166+ let date = Zoned :: now ( ) . strftime ( "%Y-%m-%d" ) . to_string ( ) ;
167+ let processed_manpage = post_process_manpage ( manpage, & date) ;
178168
179169 // Write the processed manpage to stdout
180170 io:: stdout ( )
@@ -726,7 +716,7 @@ mod tests {
726716 ".TH cat 1 \" cat (uutils coreutils) 0.7.0\" \n .SH NAME\n cat - concatenate files\n " ;
727717 let expected = ".TH CAT 1 \" 2024-01-01\" \n .SH NAME\n cat - concatenate files\n " ;
728718
729- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
719+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
730720 assert_eq ! ( result, expected) ;
731721 }
732722
@@ -736,7 +726,7 @@ mod tests {
736726 let input = ".TH TEST 1 \n Some text with trailing spaces \n .SH SECTION \n " ;
737727 let expected = ".TH TEST 1 \" 2024-01-01\" \n Some text with trailing spaces\n .SH SECTION\n " ;
738728
739- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
729+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
740730 assert_eq ! ( result, expected) ;
741731 }
742732
@@ -746,7 +736,7 @@ mod tests {
746736 let input = ".TH TEST 1\n .br\n .br\n Some text\n " ;
747737 let expected = ".TH TEST 1 \" 2024-01-01\" \n .br\n Some text\n " ;
748738
749- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
739+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
750740 assert_eq ! ( result, expected) ;
751741 }
752742
@@ -757,7 +747,7 @@ mod tests {
757747 let input = ".TH TEST 1\n .br\n \n .br\n Some text\n " ;
758748 let expected = ".TH TEST 1 \" 2024-01-01\" \n \n Some text\n " ;
759749
760- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
750+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
761751 assert_eq ! ( result, expected) ;
762752 }
763753
@@ -767,7 +757,7 @@ mod tests {
767757 let input = ".TH TEST 1\n Line 1\n .br\n Line 2\n " ;
768758 let expected = ".TH TEST 1 \" 2024-01-01\" \n Line 1\n .br\n Line 2\n " ;
769759
770- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
760+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
771761 assert_eq ! ( result, expected) ;
772762 }
773763
@@ -777,7 +767,7 @@ mod tests {
777767 let input = ".TH CaT 1 \" some version info\" \n Content\n " ;
778768 let expected = ".TH CAT 1 \" 2024-01-01\" \n Content\n " ;
779769
780- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
770+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
781771 assert_eq ! ( result, expected) ;
782772 }
783773
@@ -787,7 +777,7 @@ mod tests {
787777 let input = ".SH NAME\n test - a test utility\n " ;
788778 let expected = ".SH NAME\n test - a test utility\n " ;
789779
790- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
780+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
791781 assert_eq ! ( result, expected) ;
792782 }
793783
@@ -799,7 +789,7 @@ mod tests {
799789 // .br followed/preceded by empty lines should be removed, consecutive .br should have one removed
800790 let expected = ".TH TEST 1 \" 2024-01-01\" \n Section 1\n \n Middle\n .br\n Section 2\n \n End\n " ;
801791
802- let result = post_process_manpage ( input. to_string ( ) , Some ( "2024-01-01" ) ) ;
792+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
803793 assert_eq ! ( result, expected) ;
804794 }
805795
@@ -808,25 +798,65 @@ mod tests {
808798 // Test that malformed TH headers don't cause panics and are handled gracefully
809799 let input1 = ".TH\n Content\n " ; // Missing command name
810800 let expected1 = ".TH\n Content\n " ;
811- let result1 = post_process_manpage ( input1. to_string ( ) , Some ( "2024-01-01" ) ) ;
801+ let result1 = post_process_manpage ( input1. to_string ( ) , "2024-01-01" ) ;
812802 assert_eq ! ( result1, expected1) ;
813803
814804 // TH header with special characters
815805 let input2 = ".TH test-cmd 1 \" version 1.0\" \n Content\n " ;
816806 let expected2 = ".TH TEST-CMD 1 \" 2024-01-01\" \n Content\n " ;
817- let result2 = post_process_manpage ( input2. to_string ( ) , Some ( "2024-01-01" ) ) ;
807+ let result2 = post_process_manpage ( input2. to_string ( ) , "2024-01-01" ) ;
818808 assert_eq ! ( result2, expected2) ;
819809
820810 // TH header at end of file without newline
821811 let input3 = "Content\n .TH test 1" ;
822812 let expected3 = "Content\n .TH TEST 1 \" 2024-01-01\" \n " ;
823- let result3 = post_process_manpage ( input3. to_string ( ) , Some ( "2024-01-01" ) ) ;
813+ let result3 = post_process_manpage ( input3. to_string ( ) , "2024-01-01" ) ;
824814 assert_eq ! ( result3, expected3) ;
825815
826816 // Multiple TH headers (only first should be processed due to ^anchor)
827817 let input4 = ".TH first 1\n Middle\n .TH second 1\n " ;
828818 let expected4 = ".TH FIRST 1 \" 2024-01-01\" \n Middle\n .TH SECOND 1 \" 2024-01-01\" \n " ;
829- let result4 = post_process_manpage ( input4. to_string ( ) , Some ( "2024-01-01" ) ) ;
819+ let result4 = post_process_manpage ( input4. to_string ( ) , "2024-01-01" ) ;
830820 assert_eq ! ( result4, expected4) ;
831821 }
822+
823+ #[ test]
824+ fn test_post_process_manpage_removes_br_before_empty_line ( ) {
825+ // Test that .br is removed when followed by empty line (which becomes .sp)
826+ let input = ".TH TEST 1\n Some text\n .br\n \n More text\n " ;
827+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Some text\n \n More text\n " ;
828+
829+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
830+ assert_eq ! ( result, expected) ;
831+ }
832+
833+ #[ test]
834+ fn test_post_process_manpage_complex_br_before_empty ( ) {
835+ // Test multiple .br before empty line patterns
836+ let input = ".TH TEST 1\n Section 1\n .br\n \n Section 2\n .br\n \n Section 3\n " ;
837+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Section 1\n \n Section 2\n \n Section 3\n " ;
838+
839+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
840+ assert_eq ! ( result, expected) ;
841+ }
842+
843+ #[ test]
844+ fn test_post_process_manpage_removes_br_after_empty_line ( ) {
845+ // Test that .br is removed when preceded by empty line (which becomes .sp)
846+ let input = ".TH TEST 1\n Some text\n \n .br\n More text\n " ;
847+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Some text\n \n More text\n " ;
848+
849+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
850+ assert_eq ! ( result, expected) ;
851+ }
852+
853+ #[ test]
854+ fn test_post_process_manpage_fixes_escape_sequences ( ) {
855+ // Test that \\\\0 and \\0 are fixed to \e0 (literal backslash-zero)
856+ let input = ".TH TEST 1\n Text with \\ \\ \\ \\ 0 and \\ \\ 0 escape\n " ;
857+ let expected = ".TH TEST 1 \" 2024-01-01\" \n Text with \\ e0 and \\ e0 escape\n " ;
858+
859+ let result = post_process_manpage ( input. to_string ( ) , "2024-01-01" ) ;
860+ assert_eq ! ( result, expected) ;
861+ }
832862}
0 commit comments