1+ import csv
2+ import os
3+ import re
14from django .test import TestCase
25from pokemon_v2 .models import *
36
@@ -9,3 +12,235 @@ def setUp(self):
912 def fields_are_valid (self ):
1013 smell = Ability .objects .get (name = "Smell" )
1114 self .assertEqual (smell .generation_id , 3 )
15+
16+
17+ class CSVResourceNameValidationTestCase (TestCase ):
18+ """
19+ Test that all resource identifiers in CSV files follow ASCII slug format.
20+
21+ Resource identifiers are used in API URLs and should be URL-safe ASCII slugs
22+ (lowercase letters, numbers, and hyphens only).
23+
24+ This test validates the data source (CSV files) before it's loaded into the database.
25+ """
26+
27+ # Pattern for valid resource identifiers: lowercase letters, numbers, and hyphens only
28+ VALID_IDENTIFIER_PATTERN = re .compile (r"^[a-z0-9-]+$" )
29+
30+ # CSV files that contain an 'identifier' column to validate
31+ # Format: (filename, identifier_column_name)
32+ CSV_FILES_TO_VALIDATE = [
33+ ("abilities.csv" , "identifier" ),
34+ ("berry_firmness.csv" , "identifier" ),
35+ ("conquest_episodes.csv" , "identifier" ),
36+ ("conquest_kingdoms.csv" , "identifier" ),
37+ ("conquest_move_displacements.csv" , "identifier" ),
38+ ("conquest_move_ranges.csv" , "identifier" ),
39+ ("conquest_stats.csv" , "identifier" ),
40+ ("conquest_warrior_archetypes.csv" , "identifier" ),
41+ ("conquest_warrior_skills.csv" , "identifier" ),
42+ ("conquest_warrior_stats.csv" , "identifier" ),
43+ ("conquest_warriors.csv" , "identifier" ),
44+ ("contest_types.csv" , "identifier" ),
45+ ("egg_groups.csv" , "identifier" ),
46+ ("encounter_conditions.csv" , "identifier" ),
47+ ("encounter_condition_values.csv" , "identifier" ),
48+ ("encounter_methods.csv" , "identifier" ),
49+ ("evolution_triggers.csv" , "identifier" ),
50+ ("genders.csv" , "identifier" ),
51+ ("generations.csv" , "identifier" ),
52+ ("growth_rates.csv" , "identifier" ),
53+ ("items.csv" , "identifier" ),
54+ ("item_categories.csv" , "identifier" ),
55+ ("item_flags.csv" , "identifier" ),
56+ ("item_fling_effects.csv" , "identifier" ),
57+ ("item_pockets.csv" , "identifier" ),
58+ ("languages.csv" , "identifier" ),
59+ ("locations.csv" , "identifier" ),
60+ ("location_areas.csv" , "identifier" ),
61+ ("moves.csv" , "identifier" ),
62+ ("move_battle_styles.csv" , "identifier" ),
63+ ("move_damage_classes.csv" , "identifier" ),
64+ ("move_flags.csv" , "identifier" ),
65+ ("move_meta_ailments.csv" , "identifier" ),
66+ ("move_meta_categories.csv" , "identifier" ),
67+ ("move_targets.csv" , "identifier" ),
68+ ("natures.csv" , "identifier" ),
69+ ("pal_park_areas.csv" , "identifier" ),
70+ ("pokeathlon_stats.csv" , "identifier" ),
71+ ("pokedexes.csv" , "identifier" ),
72+ ("pokemon.csv" , "identifier" ),
73+ ("pokemon_colors.csv" , "identifier" ),
74+ ("pokemon_forms.csv" , "identifier" ),
75+ ("pokemon_habitats.csv" , "identifier" ),
76+ ("pokemon_move_methods.csv" , "identifier" ),
77+ ("pokemon_shapes.csv" , "identifier" ),
78+ ("pokemon_species.csv" , "identifier" ),
79+ ("regions.csv" , "identifier" ),
80+ ("stats.csv" , "identifier" ),
81+ ("types.csv" , "identifier" ),
82+ ("versions.csv" , "identifier" ),
83+ ("version_groups.csv" , "identifier" ),
84+ ]
85+
86+ def get_csv_path (self , filename ):
87+ """Get the absolute path to a CSV file in data/v2/csv/"""
88+ from django .conf import settings
89+
90+ base_dir = settings .BASE_DIR
91+ return os .path .join (base_dir , "data" , "v2" , "csv" , filename )
92+
93+ def test_all_csv_identifiers_are_ascii_slugs (self ):
94+ """
95+ Validate that all resource identifiers in CSV files follow the ASCII slug format.
96+
97+ Identifiers should only contain:
98+ - Lowercase letters (a-z)
99+ - Numbers (0-9)
100+ - Hyphens (-)
101+
102+ This test will fail if any CSV contains identifiers with:
103+ - Unicode characters (ñ, ', é, etc.)
104+ - Uppercase letters
105+ - Spaces
106+ - Special characters (&, (), ', etc.)
107+ """
108+ violations = []
109+ missing_files = []
110+
111+ for filename , identifier_column in self .CSV_FILES_TO_VALIDATE :
112+ csv_path = self .get_csv_path (filename )
113+
114+ # Track missing files to report at the end
115+ if not os .path .exists (csv_path ):
116+ missing_files .append (filename )
117+ continue
118+
119+ try :
120+ with open (csv_path , "r" , encoding = "utf-8" ) as csvfile :
121+ reader = csv .DictReader (csvfile )
122+
123+ # Check if the identifier column exists
124+ if identifier_column not in reader .fieldnames :
125+ violations .append (
126+ {
127+ "file" : filename ,
128+ "row" : "N/A" ,
129+ "id" : "N/A" ,
130+ "identifier" : f"Column '{ identifier_column } ' not found" ,
131+ }
132+ )
133+ continue
134+
135+ for row_num , row in enumerate (
136+ reader , start = 2
137+ ): # Start at 2 (after header)
138+ identifier = row .get (identifier_column , "" ).strip ()
139+
140+ # Skip empty identifiers
141+ if not identifier :
142+ continue
143+
144+ # Check if identifier matches the pattern
145+ if not self .VALID_IDENTIFIER_PATTERN .match (identifier ):
146+ violations .append (
147+ {
148+ "file" : filename ,
149+ "row" : row_num ,
150+ "id" : row .get ("id" , "N/A" ),
151+ "identifier" : identifier ,
152+ }
153+ )
154+
155+ except Exception as e :
156+ violations .append (
157+ {
158+ "file" : filename ,
159+ "row" : "N/A" ,
160+ "id" : "N/A" ,
161+ "identifier" : f"Error reading file: { str (e )} " ,
162+ }
163+ )
164+
165+ # If there are violations or missing files, create a detailed error message
166+ if violations or missing_files :
167+ error_lines = []
168+
169+ # Report missing files first
170+ if missing_files :
171+ error_lines .append ("\n \n Missing CSV files:" )
172+ for filename in missing_files :
173+ error_lines .append (f" - { filename } " )
174+ error_lines .append (
175+ "\n All CSV files listed in CSV_FILES_TO_VALIDATE must exist."
176+ )
177+
178+ # Report violations
179+ if violations :
180+ error_lines .append (
181+ "\n \n Found {} resource(s) with invalid identifiers (not ASCII slugs):" .format (
182+ len (violations )
183+ )
184+ )
185+ error_lines .append ("\n Identifiers must match pattern: ^[a-z0-9-]+$" )
186+ error_lines .append ("\n Invalid identifiers found in CSV files:" )
187+
188+ for v in violations :
189+ error_lines .append (
190+ " - {file} (row {row}, id={id}): {identifier}" .format (** v )
191+ )
192+
193+ error_lines .append (
194+ "\n These identifiers contain invalid characters and must be normalized."
195+ )
196+ error_lines .append (
197+ "Update the CSV files in data/v2/csv/ to fix these identifiers."
198+ )
199+ error_lines .append ("\n Suggested fixes:" )
200+ error_lines .append (
201+ " - Remove Unicode apostrophes (') and replace with regular hyphens or remove"
202+ )
203+ error_lines .append (" - Remove Unicode letters (ñ → n)" )
204+ error_lines .append (
205+ " - Remove parentheses and other special characters"
206+ )
207+ error_lines .append (" - Convert to lowercase" )
208+
209+ self .fail ("\n " .join (error_lines ))
210+
211+ def test_identifier_pattern_examples (self ):
212+ """Test that the validation pattern works correctly with example identifiers."""
213+ # Valid identifiers
214+ valid_identifiers = [
215+ "pikachu" ,
216+ "charizard-mega-x" ,
217+ "mr-mime" ,
218+ "ho-oh" ,
219+ "type-null" ,
220+ "item-123" ,
221+ "mega-stone" ,
222+ ]
223+
224+ for identifier in valid_identifiers :
225+ self .assertTrue (
226+ self .VALID_IDENTIFIER_PATTERN .match (identifier ),
227+ f"{ identifier } should be valid but was rejected" ,
228+ )
229+
230+ # Invalid identifiers
231+ invalid_identifiers = [
232+ "Pikachu" , # Uppercase
233+ "Mr. Mime" , # Space and period
234+ "kofu's-wallet" , # Unicode apostrophe
235+ "jalapeño" , # Unicode ñ
236+ "steel-bottle-(r)" , # Parentheses
237+ "b&w-grass-tablecloth" , # Ampersand
238+ "farfetch'd" , # Apostrophe
239+ "kofu's-wallet" , # Regular apostrophe
240+ ]
241+
242+ for identifier in invalid_identifiers :
243+ self .assertFalse (
244+ self .VALID_IDENTIFIER_PATTERN .match (identifier ),
245+ f"{ identifier } should be invalid but was accepted" ,
246+ )
0 commit comments