Files
BA-translator/dumpdbschema.py

121 lines
4.0 KiB
Python

#!/usr/bin/env python3
"""
Database Schema Dumper
This module parses C# files to extract repository and database schema information,
creating a mapping file that connects repositories to their corresponding
database tables and schema classes. Requires decompiled C# files to generate the schema.
"""
import json
import os
import re
from collections import OrderedDict
from tqdm import tqdm
# Configuration
CSHARP_FILE = 'types.cs'
OUTPUT_MAP_FILE = 'repository_map.json'
def parse_csharp_files():
"""Parse C# file to extract repository and database schema information.
Returns:
tuple: (repositories_dict, db_schemas_dict) containing parsed information
"""
if not os.path.exists(CSHARP_FILE):
print(f"ERROR: File '{CSHARP_FILE}' not found.")
return None, None
# Regular expressions for parsing
repo_pattern = re.compile(
r'public class (\w+)\s*:\s*BaseExcelRepository<[^,]+,\s*([^,]+),\s*([^>]+)>'
)
db_schema_pattern = re.compile(r'public class (\w+)\s*:\s*BaseDBSchema')
prop_pattern = re.compile(r'public\s+([\w.<>\[\]?]+)\s+(\w+)\s*{\s*get;\s*set;\s*}')
db_schemas = OrderedDict()
repositories = OrderedDict()
current_db_schema = None
print(f"Analyzing '{CSHARP_FILE}' to create repository mapping...")
with open(CSHARP_FILE, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in tqdm(lines, desc="Parsing C# file"):
# Remove comments
line = line.strip().split('//')[0]
# Look for repository definitions
repo_match = repo_pattern.search(line)
if repo_match:
repo_name = repo_match.group(1)
db_schema_class = repo_match.group(2).strip()
blob_schema_class = repo_match.group(3).strip()
repositories[repo_name] = {
'db_schema_class': db_schema_class,
'blob_schema_class': blob_schema_class
}
continue
# Look for database schema definitions
db_schema_match = db_schema_pattern.search(line)
if db_schema_match:
current_db_schema = db_schema_match.group(1)
db_schemas[current_db_schema] = []
continue
# If inside a database schema definition, look for properties
if current_db_schema:
prop_match = prop_pattern.search(line)
if prop_match:
prop_type = prop_match.group(1)
prop_name = prop_match.group(2)
db_schemas[current_db_schema].append({
'name': prop_name,
'type': prop_type
})
# End of class definition
if '}' in line and '{' not in line:
current_db_schema = None
print(f"\nFound {len(repositories)} repositories and {len(db_schemas)} database schemas.")
return repositories, db_schemas
def main():
"""Main function to create the repository mapping file."""
repos, schemas = parse_csharp_files()
if not repos:
print("No repositories found. Nothing to save.")
return
# Combine information into final mapping
final_map = OrderedDict()
for repo_name, repo_data in repos.items():
db_schema_name = repo_data['db_schema_class']
# Database table name is the database schema class name
table_name = db_schema_name
# Find key columns for this schema
key_columns = schemas.get(db_schema_name, [])
final_map[repo_name] = {
'table_name': table_name,
'key_columns': key_columns,
'blob_schema_class': repo_data['blob_schema_class']
}
print(f"Saving repository mapping to '{OUTPUT_MAP_FILE}'...")
with open(OUTPUT_MAP_FILE, 'w', encoding='utf-8') as f:
json.dump(final_map, f, indent=2, ensure_ascii=False)
print("Done! Repository mapping created successfully.")
print(f"You can now use '{OUTPUT_MAP_FILE}' as the source of truth for database operations.")
if __name__ == "__main__":
main()