import argparse import bisect import csv import unicodedata from dataclasses import dataclass _EXTRA_MAP = str.maketrans({"ł": "l", "Ł": "L"}) @dataclass(frozen=True) class City: sym: str name: str woj_code: str woj_name: str pow_code: str gmi_code: str rodz_gmi: str rm: str mz: str sympod: str @dataclass(frozen=True) class CityGroup: label: str woj_code: str woj_name: str pow_code: str members: tuple[City, ...] def normalize(text: str) -> str: text = text.translate(_EXTRA_MAP) text = unicodedata.normalize("NFKD", text.lower()) return "".join(c for c in text if not unicodedata.combining(c)) def load_terc(path: str, encoding: str = "utf-8-sig") -> tuple[dict[str, str], dict[tuple[str, str, str, str], str]]: """Laduje mape kodow wojewodztw i jednostek TERC.""" woj_names: dict[str, str] = {} terc_units: dict[tuple[str, str, str, str], str] = {} with open(path, encoding=encoding, newline="") as f: reader = csv.DictReader(f, delimiter=";") for row in reader: woj = row["WOJ"].strip() pow_code = row["POW"].strip() gmi_code = row["GMI"].strip() rodz_code = row["RODZ"].strip() name = row["NAZWA"].strip() if woj and not pow_code and not gmi_code and name: woj_names[woj] = name if woj and pow_code and gmi_code and rodz_code and name: terc_units[(woj, pow_code, gmi_code, rodz_code)] = name return woj_names, terc_units def load_simc(path: str, woj_names: dict[str, str], encoding: str = "utf-8-sig") -> dict[str, City]: """Laduje miejscowosci z SIMC i dokleja nazwe wojewodztwa z TERC.""" cities: dict[str, City] = {} with open(path, encoding=encoding, newline="") as f: reader = csv.DictReader(f, delimiter=";") for row in reader: sym = row["SYM"].strip() woj_code = row["WOJ"].strip() cities[sym] = City( sym=sym, name=row["NAZWA"].strip(), woj_code=woj_code, woj_name=woj_names.get(woj_code, "nieznane"), pow_code=row["POW"].strip(), gmi_code=row["GMI"].strip(), rodz_gmi=row["RODZ_GMI"].strip(), rm=row["RM"].strip(), mz=row["MZ"].strip(), sympod=row["SYMPOD"].strip(), ) return cities def load_ulic( path: str, terc_units: dict[tuple[str, str, str, str], str], encoding: str = "utf-8-sig", ) -> dict[str, list[dict[str, str]]]: """Laduje ulice z ULIC i grupuje po SYM miejscowosci.""" streets_by_sym: dict[str, list[dict[str, str]]] = {} with open(path, encoding=encoding, newline="") as f: reader = csv.DictReader(f, delimiter=";") for row in reader: sym = row["SYM"].strip() cecha = row["CECHA"].strip() nazwa_1 = row["NAZWA_1"].strip() nazwa_2 = row["NAZWA_2"].strip() full = " ".join(part for part in [cecha, nazwa_1, nazwa_2] if part).strip() street = { "woj": row["WOJ"].strip(), "pow": row["POW"].strip(), "gmi": row["GMI"].strip(), "rodz_gmi": row["RODZ_GMI"].strip(), "sym": sym, "sym_ul": row["SYM_UL"].strip(), "full": full, "cecha": cecha, "nazwa_1": nazwa_1, "nazwa_2": nazwa_2, "stan_na": row["STAN_NA"].strip(), "terc_unit_name": terc_units.get( (row["WOJ"].strip(), row["POW"].strip(), row["GMI"].strip(), row["RODZ_GMI"].strip()), "", ), } streets_by_sym.setdefault(sym, []).append(street) return streets_by_sym def format_street( street: dict[str, str], city_name_basic: str = "", city_name_precise: str = "", ) -> str: basic_name = city_name_basic or street.get("city_name_basic", "") precise_name = city_name_precise or street.get("city_name_precise", "") or street.get("terc_unit_name", "") if precise_name and basic_name and normalize(precise_name) == normalize(basic_name): precise_name = "" parts = [ f"full={street['full']}", f"sym_ul={street['sym_ul']}", f"cecha={street['cecha']}", f"nazwa_1={street['nazwa_1']}", f"nazwa_2={street['nazwa_2']}", f"woj={street['woj']}", f"pow={street['pow']}", f"gmi={street['gmi']}", f"rodz_gmi={street['rodz_gmi']}", f"sym={street['sym']}", f"miejscowosc={basic_name}", ] if precise_name: parts.append(f"miejscowosc_precyzyjna={precise_name}") parts.append(f"stan_na={street['stan_na']}") return " | ".join(parts) def build_city_index(cities: dict[str, City]) -> tuple[list[tuple[str, City]], list[str]]: """Buduje indeks prefiksowy do wyszukiwania miejscowosci.""" index: list[tuple[str, City]] = [] for city in cities.values(): words = city.name.split() for word in words: index.append((normalize(word), city)) if len(words) > 1: index.append((normalize(city.name.replace(" ", "")), city)) index.sort(key=lambda x: x[0]) keys = [item[0] for item in index] return index, keys def search_cities(query: str, city_index: list[tuple[str, City]], city_keys: list[str], limit: int = 10) -> list[City]: query_norm = normalize(query) if len(query_norm) < 2: return [] lo = bisect.bisect_left(city_keys, query_norm) hi = bisect.bisect_left(city_keys, query_norm + "\uffff") seen: dict[str, City] = {} for _, city in city_index[lo:hi]: if city.sym not in seen: seen[city.sym] = city if len(seen) >= limit: break return list(seen.values()) def build_street_index(streets: list[dict[str, str]]) -> tuple[list[tuple[str, dict[str, str]]], list[str]]: """Buduje indeks prefiksowy do wyszukiwania ulic w jednej miejscowosci.""" index: list[tuple[str, dict[str, str]]] = [] for street in streets: words = [street["cecha"], street["nazwa_1"], street["nazwa_2"]] for word in words: if word: index.append((normalize(word), street)) if street["nazwa_1"] and street["nazwa_2"]: index.append((normalize(street["nazwa_1"] + street["nazwa_2"]), street)) index.sort(key=lambda x: x[0]) keys = [item[0] for item in index] return index, keys def search_streets( query: str, street_index: list[tuple[str, dict[str, str]]], street_keys: list[str], limit: int = 20, ) -> list[dict[str, str]]: query_norm = normalize(query) if len(query_norm) < 2: return [] lo = bisect.bisect_left(street_keys, query_norm) hi = bisect.bisect_left(street_keys, query_norm + "\uffff") seen: dict[str, dict[str, str]] = {} for _, street in street_index[lo:hi]: key = street["sym_ul"] if key not in seen: seen[key] = street if len(seen) >= limit: break return list(seen.values()) def city_group_key(city: City) -> tuple[str, ...]: """Buduje klucz grupowania tak, by laczyc administracyjne czesci jednego miasta.""" if city.sympod and city.sympod != city.sym: return ("parent", city.sympod) if city.rm in {"95", "98"} and city.mz == "1": return ("rm95_98", normalize(city.name), city.woj_code, city.pow_code) return ("self", city.sym) def group_cities(cities: list[City]) -> list[CityGroup]: grouped: dict[tuple[str, ...], list[City]] = {} for city in cities: grouped.setdefault(city_group_key(city), []).append(city) groups: list[CityGroup] = [] for members in grouped.values(): sorted_members = sorted(members, key=lambda c: c.sym) first = sorted_members[0] groups.append( CityGroup( label=first.name, woj_code=first.woj_code, woj_name=first.woj_name, pow_code=first.pow_code, members=tuple(sorted_members), ) ) return groups def merge_group_streets( group: CityGroup, streets_by_sym: dict[str, list[dict[str, str]]], include_precise_city_name: bool = False, ) -> list[dict[str, str]]: merged: list[dict[str, str]] = [] seen_sym_ul: set[str] = set() for city in group.members: for street in streets_by_sym.get(city.sym, []): sym_ul = street["sym_ul"] if sym_ul not in seen_sym_ul: seen_sym_ul.add(sym_ul) if include_precise_city_name: precise_name = street.get("terc_unit_name") or city.name enriched = {**street, "city_name_basic": city.name} if normalize(precise_name) != normalize(city.name): enriched["city_name_precise"] = precise_name merged.append(enriched) else: merged.append(street) return merged def city_match_rank(query_norm: str, label: str) -> tuple[int, str]: label_norm = normalize(label) words = label_norm.split() if label_norm == query_norm: match_level = 0 elif label_norm.startswith(query_norm): match_level = 1 elif any(word.startswith(query_norm) for word in words): match_level = 2 elif query_norm in label_norm: match_level = 3 else: match_level = 4 return match_level, label_norm def print_stats(woj_names: dict[str, str], cities: dict[str, City], streets_by_sym: dict[str, list[dict[str, str]]]) -> None: streets_count = sum(len(items) for items in streets_by_sym.values()) print(f"Wojewodztwa (TERC): {len(woj_names)}") print(f"Miejscowosci (SIMC): {len(cities)}") print(f"Miejscowosci z ulicami (ULIC): {len(streets_by_sym)}") print(f"Lacznie ulic (ULIC): {streets_count}") def run_search_city(query: str, city_index: list[tuple[str, City]], city_keys: list[str], limit: int) -> int: results = search_cities(query, city_index, city_keys, limit=limit) if not results: print("Brak wynikow.") return 1 for city in results: print( f"{city.name} | sym={city.sym} | woj={city.woj_code} ({city.woj_name}) " f"| pow={city.pow_code} | gmi={city.gmi_code}" ) return 0 def run_list_streets( city_sym: str, streets_by_sym: dict[str, list[dict[str, str]]], cities_by_sym: dict[str, City], limit: int, ) -> int: streets = streets_by_sym.get(city_sym, []) if not streets: print(f"Brak ulic dla SYM={city_sym}.") return 1 city_name = cities_by_sym.get(city_sym).name if city_sym in cities_by_sym else "" for street in streets[:limit]: precise_name = street.get("terc_unit_name") or city_name print(format_street(street, city_name_basic=city_name, city_name_precise=precise_name)) if len(streets) > limit: print(f"... i jeszcze {len(streets) - limit} ulic") return 0 def run_interactive( city_index: list[tuple[str, City]], city_keys: list[str], streets_by_sym: dict[str, list[dict[str, str]]], cities_by_sym: dict[str, City], city_limit: int, street_limit: int, ) -> int: print("Tryb interaktywny.") print("Wpisz nazwe miejscowosci (min. 2 znaki), pusta linia = wyjscie.\n") while True: try: city_query = input("miasto >>> ").strip() except (EOFError, KeyboardInterrupt): print("\nKoniec.") return 0 if not city_query: print("Koniec.") return 0 if len(normalize(city_query)) < 2: print("Podaj co najmniej 2 znaki.\n") continue city_matches = search_cities(city_query, city_index, city_keys, limit=city_limit * 5) grouped_cities = group_cities(city_matches) query_norm = normalize(city_query) ranked_groups: list[tuple[CityGroup, int, tuple[int, str]]] = [] for group in grouped_cities: streets_count = len(merge_group_streets(group, streets_by_sym)) rank_key = city_match_rank(query_norm, group.label) ranked_groups.append((group, streets_count, rank_key)) ranked_groups.sort(key=lambda item: (item[2][0], -item[1], item[2][1])) ranked_groups = ranked_groups[:city_limit] if not ranked_groups: print("Brak wynikow.\n") continue for idx, (group, streets_count, _) in enumerate(ranked_groups): members_suffix = "" if len(group.members) > 1: members_suffix = f" | scalone_sym={len(group.members)}" print( f"[{idx}] {group.label} | woj={group.woj_code} ({group.woj_name}) | pow={group.pow_code} " f"| ulice={streets_count}{members_suffix}" ) selected: CityGroup | None = None while selected is None: try: choice = input("wybierz numer miasta (Enter = anuluj): ").strip() except (EOFError, KeyboardInterrupt): print("\nKoniec.") return 0 if not choice: print() break if not choice.isdigit() or int(choice) >= len(ranked_groups): print("Niepoprawny wybor.") continue selected = ranked_groups[int(choice)][0] if selected is None: continue city_streets = merge_group_streets(selected, streets_by_sym, include_precise_city_name=True) merged_syms = ",".join(city.sym for city in selected.members) print(f"\nWybrane miasto: {selected.label}") print(f"SYM w grupie: {merged_syms}") print(f"Ulic w miejscowosci: {len(city_streets)}") if not city_streets: print("Brak ulic dla tej miejscowosci.\n") continue street_index, street_keys = build_street_index(city_streets) print("Wpisz fragment nazwy ulicy (min. 2 znaki), pusta linia = wybor innego miasta.\n") while True: try: street_query = input("ulica >>> ").strip() except (EOFError, KeyboardInterrupt): print("\nKoniec.") return 0 if not street_query: print() break if len(normalize(street_query)) < 2: print("Podaj co najmniej 2 znaki.\n") continue found = search_streets(street_query, street_index, street_keys, limit=street_limit) if not found: print("Brak ulic.\n") continue for street in found: print(f"- {format_street(street)}") print() def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="CLI do ladowania TERC/SIMC/ULIC oraz wyszukiwania miejscowosci i ulic" ) parser.add_argument("--terc", default="TERC.csv", help="Sciezka do TERC.csv") parser.add_argument("--simc", default="SIMC.csv", help="Sciezka do SIMC.csv") parser.add_argument("--ulic", default="ULIC.csv", help="Sciezka do ULIC.csv") subparsers = parser.add_subparsers(dest="command", required=True) subparsers.add_parser("stats", help="Pokaz statystyki zaladowanych danych") city_parser = subparsers.add_parser("miasto", help="Szukaj miejscowosci po fragmencie nazwy") city_parser.add_argument("query", help="Fraza do wyszukania") city_parser.add_argument("--limit", type=int, default=10, help="Maksymalna liczba wynikow") streets_parser = subparsers.add_parser("ulice", help="Pokaz ulice dla SYM miejscowosci") streets_parser.add_argument("sym", help="Kod SYM miejscowosci") streets_parser.add_argument("--limit", type=int, default=30, help="Maksymalna liczba ulic") interactive_parser = subparsers.add_parser("interaktywne", help="Tryb interaktywny: miasto -> ulice") interactive_parser.add_argument("--city-limit", type=int, default=10, help="Maksymalna liczba miast do wyboru") interactive_parser.add_argument("--street-limit", type=int, default=20, help="Maksymalna liczba wynikow ulic") return parser def main() -> int: parser = build_parser() args = parser.parse_args() woj_names, terc_units = load_terc(args.terc) cities = load_simc(args.simc, woj_names) streets_by_sym = load_ulic(args.ulic, terc_units) city_index, city_keys = build_city_index(cities) if args.command == "stats": print_stats(woj_names, cities, streets_by_sym) return 0 if args.command == "miasto": return run_search_city(args.query, city_index, city_keys, limit=args.limit) if args.command == "ulice": return run_list_streets(args.sym, streets_by_sym, cities, limit=args.limit) if args.command == "interaktywne": return run_interactive( city_index, city_keys, streets_by_sym, cities, city_limit=args.city_limit, street_limit=args.street_limit, ) parser.print_help() return 1 if __name__ == "__main__": raise SystemExit(main())