Skip to content

Commit 61568ec

Browse files
committed
Add CSV identifier validation with comprehensive error reporting
- Validates 50 CSV files with identifier columns - Reports missing files and invalid identifiers together - Found 16 invalid identifiers across items, locations, and move_meta_categories
1 parent 5e1c604 commit 61568ec

File tree

1 file changed

+235
-0
lines changed

1 file changed

+235
-0
lines changed

pokemon_v2/test_models.py

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import csv
2+
import os
3+
import re
14
from django.test import TestCase
25
from pokemon_v2.models import *
36

@@ -9,3 +12,235 @@ def setUp(self):
912
def fields_are_valid(self):
1013
smell = Ability.objects.get(name="Smell")
1114
self.assertEqual(smell.generation_id, 3)
15+
16+
17+
class CSVResourceNameValidationTestCase(TestCase):
18+
"""
19+
Test that all resource identifiers in CSV files follow ASCII slug format.
20+
21+
Resource identifiers are used in API URLs and should be URL-safe ASCII slugs
22+
(lowercase letters, numbers, and hyphens only).
23+
24+
This test validates the data source (CSV files) before it's loaded into the database.
25+
"""
26+
27+
# Pattern for valid resource identifiers: lowercase letters, numbers, and hyphens only
28+
VALID_IDENTIFIER_PATTERN = re.compile(r"^[a-z0-9-]+$")
29+
30+
# CSV files that contain an 'identifier' column to validate
31+
# Format: (filename, identifier_column_name)
32+
CSV_FILES_TO_VALIDATE = [
33+
("abilities.csv", "identifier"),
34+
("berry_firmness.csv", "identifier"),
35+
("conquest_episodes.csv", "identifier"),
36+
("conquest_kingdoms.csv", "identifier"),
37+
("conquest_move_displacements.csv", "identifier"),
38+
("conquest_move_ranges.csv", "identifier"),
39+
("conquest_stats.csv", "identifier"),
40+
("conquest_warrior_archetypes.csv", "identifier"),
41+
("conquest_warrior_skills.csv", "identifier"),
42+
("conquest_warrior_stats.csv", "identifier"),
43+
("conquest_warriors.csv", "identifier"),
44+
("contest_types.csv", "identifier"),
45+
("egg_groups.csv", "identifier"),
46+
("encounter_conditions.csv", "identifier"),
47+
("encounter_condition_values.csv", "identifier"),
48+
("encounter_methods.csv", "identifier"),
49+
("evolution_triggers.csv", "identifier"),
50+
("genders.csv", "identifier"),
51+
("generations.csv", "identifier"),
52+
("growth_rates.csv", "identifier"),
53+
("items.csv", "identifier"),
54+
("item_categories.csv", "identifier"),
55+
("item_flags.csv", "identifier"),
56+
("item_fling_effects.csv", "identifier"),
57+
("item_pockets.csv", "identifier"),
58+
("languages.csv", "identifier"),
59+
("locations.csv", "identifier"),
60+
("location_areas.csv", "identifier"),
61+
("moves.csv", "identifier"),
62+
("move_battle_styles.csv", "identifier"),
63+
("move_damage_classes.csv", "identifier"),
64+
("move_flags.csv", "identifier"),
65+
("move_meta_ailments.csv", "identifier"),
66+
("move_meta_categories.csv", "identifier"),
67+
("move_targets.csv", "identifier"),
68+
("natures.csv", "identifier"),
69+
("pal_park_areas.csv", "identifier"),
70+
("pokeathlon_stats.csv", "identifier"),
71+
("pokedexes.csv", "identifier"),
72+
("pokemon.csv", "identifier"),
73+
("pokemon_colors.csv", "identifier"),
74+
("pokemon_forms.csv", "identifier"),
75+
("pokemon_habitats.csv", "identifier"),
76+
("pokemon_move_methods.csv", "identifier"),
77+
("pokemon_shapes.csv", "identifier"),
78+
("pokemon_species.csv", "identifier"),
79+
("regions.csv", "identifier"),
80+
("stats.csv", "identifier"),
81+
("types.csv", "identifier"),
82+
("versions.csv", "identifier"),
83+
("version_groups.csv", "identifier"),
84+
]
85+
86+
def get_csv_path(self, filename):
87+
"""Get the absolute path to a CSV file in data/v2/csv/"""
88+
from django.conf import settings
89+
90+
base_dir = settings.BASE_DIR
91+
return os.path.join(base_dir, "data", "v2", "csv", filename)
92+
93+
def test_all_csv_identifiers_are_ascii_slugs(self):
94+
"""
95+
Validate that all resource identifiers in CSV files follow the ASCII slug format.
96+
97+
Identifiers should only contain:
98+
- Lowercase letters (a-z)
99+
- Numbers (0-9)
100+
- Hyphens (-)
101+
102+
This test will fail if any CSV contains identifiers with:
103+
- Unicode characters (ñ, ', é, etc.)
104+
- Uppercase letters
105+
- Spaces
106+
- Special characters (&, (), ', etc.)
107+
"""
108+
violations = []
109+
missing_files = []
110+
111+
for filename, identifier_column in self.CSV_FILES_TO_VALIDATE:
112+
csv_path = self.get_csv_path(filename)
113+
114+
# Track missing files to report at the end
115+
if not os.path.exists(csv_path):
116+
missing_files.append(filename)
117+
continue
118+
119+
try:
120+
with open(csv_path, "r", encoding="utf-8") as csvfile:
121+
reader = csv.DictReader(csvfile)
122+
123+
# Check if the identifier column exists
124+
if identifier_column not in reader.fieldnames:
125+
violations.append(
126+
{
127+
"file": filename,
128+
"row": "N/A",
129+
"id": "N/A",
130+
"identifier": f"Column '{identifier_column}' not found",
131+
}
132+
)
133+
continue
134+
135+
for row_num, row in enumerate(
136+
reader, start=2
137+
): # Start at 2 (after header)
138+
identifier = row.get(identifier_column, "").strip()
139+
140+
# Skip empty identifiers
141+
if not identifier:
142+
continue
143+
144+
# Check if identifier matches the pattern
145+
if not self.VALID_IDENTIFIER_PATTERN.match(identifier):
146+
violations.append(
147+
{
148+
"file": filename,
149+
"row": row_num,
150+
"id": row.get("id", "N/A"),
151+
"identifier": identifier,
152+
}
153+
)
154+
155+
except Exception as e:
156+
violations.append(
157+
{
158+
"file": filename,
159+
"row": "N/A",
160+
"id": "N/A",
161+
"identifier": f"Error reading file: {str(e)}",
162+
}
163+
)
164+
165+
# If there are violations or missing files, create a detailed error message
166+
if violations or missing_files:
167+
error_lines = []
168+
169+
# Report missing files first
170+
if missing_files:
171+
error_lines.append("\n\nMissing CSV files:")
172+
for filename in missing_files:
173+
error_lines.append(f" - {filename}")
174+
error_lines.append(
175+
"\nAll CSV files listed in CSV_FILES_TO_VALIDATE must exist."
176+
)
177+
178+
# Report violations
179+
if violations:
180+
error_lines.append(
181+
"\n\nFound {} resource(s) with invalid identifiers (not ASCII slugs):".format(
182+
len(violations)
183+
)
184+
)
185+
error_lines.append("\nIdentifiers must match pattern: ^[a-z0-9-]+$")
186+
error_lines.append("\nInvalid identifiers found in CSV files:")
187+
188+
for v in violations:
189+
error_lines.append(
190+
" - {file} (row {row}, id={id}): {identifier}".format(**v)
191+
)
192+
193+
error_lines.append(
194+
"\nThese identifiers contain invalid characters and must be normalized."
195+
)
196+
error_lines.append(
197+
"Update the CSV files in data/v2/csv/ to fix these identifiers."
198+
)
199+
error_lines.append("\nSuggested fixes:")
200+
error_lines.append(
201+
" - Remove Unicode apostrophes (') and replace with regular hyphens or remove"
202+
)
203+
error_lines.append(" - Remove Unicode letters (ñ → n)")
204+
error_lines.append(
205+
" - Remove parentheses and other special characters"
206+
)
207+
error_lines.append(" - Convert to lowercase")
208+
209+
self.fail("\n".join(error_lines))
210+
211+
def test_identifier_pattern_examples(self):
212+
"""Test that the validation pattern works correctly with example identifiers."""
213+
# Valid identifiers
214+
valid_identifiers = [
215+
"pikachu",
216+
"charizard-mega-x",
217+
"mr-mime",
218+
"ho-oh",
219+
"type-null",
220+
"item-123",
221+
"mega-stone",
222+
]
223+
224+
for identifier in valid_identifiers:
225+
self.assertTrue(
226+
self.VALID_IDENTIFIER_PATTERN.match(identifier),
227+
f"{identifier} should be valid but was rejected",
228+
)
229+
230+
# Invalid identifiers
231+
invalid_identifiers = [
232+
"Pikachu", # Uppercase
233+
"Mr. Mime", # Space and period
234+
"kofu's-wallet", # Unicode apostrophe
235+
"jalapeño", # Unicode ñ
236+
"steel-bottle-(r)", # Parentheses
237+
"b&w-grass-tablecloth", # Ampersand
238+
"farfetch'd", # Apostrophe
239+
"kofu's-wallet", # Regular apostrophe
240+
]
241+
242+
for identifier in invalid_identifiers:
243+
self.assertFalse(
244+
self.VALID_IDENTIFIER_PATTERN.match(identifier),
245+
f"{identifier} should be invalid but was accepted",
246+
)

0 commit comments

Comments
 (0)