-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Expand file tree
/
Copy pathpointblank_atoms.py
More file actions
43 lines (34 loc) · 1.16 KB
/
pointblank_atoms.py
File metadata and controls
43 lines (34 loc) · 1.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "pointblank[pl]",
# ]
# ///
import polars as pl
import pointblank as pb
VALID_ELEMENTS = ["Cu", "Pt"]
def main() -> None:
atoms = pl.read_csv("pointblank_atoms.csv")
validation = (
pb.Validate(
data=atoms,
tbl_name="atoms_from_parser",
label="Round-trip validation before re-export",
thresholds=pb.Thresholds(warning=0.02, error=0.05, critical=0.07),
)
.col_vals_in_set(columns="symbol", set=VALID_ELEMENTS)
.col_vals_not_null(columns=["x", "y", "z"])
.col_vals_between(columns=["x", "y", "z"], left=0, right=20)
.col_vals_between(columns="fx", left=-1000, right=1000)
.interrogate()
)
clean = validation.get_sundered_data(type="pass")
dirty = validation.get_sundered_data(type="fail")
print(f"Safe to re-export: {len(clean)} rows")
print(f"Needs review: {len(dirty)} rows")
print("\nClean rows")
print(clean.select(["atom_id", "symbol", "x", "fx"]))
print("\nDirty rows")
print(dirty.select(["atom_id", "symbol", "x", "fx"]))
if __name__ == "__main__":
main()