BA-translator/dumpdbschema.py

#!/usr/bin/env python3
"""
Database Schema Dumper

This module parses C# files to extract repository and database schema information,
creating a mapping file that connects repositories to their corresponding
database tables and schema classes. Requires decompiled C# files to generate the schema.
"""

import json
import os
import re
from collections import OrderedDict
from tqdm import tqdm

# Configuration
CSHARP_FILE = 'types.cs'
OUTPUT_MAP_FILE = 'repository_map.json'

def parse_csharp_files():
    """Parse C# file to extract repository and database schema information.

    Returns:
        tuple: (repositories_dict, db_schemas_dict) containing parsed information
    """
    if not os.path.exists(CSHARP_FILE):
        print(f"ERROR: File '{CSHARP_FILE}' not found.")
        return None, None

    # Regular expressions for parsing
    repo_pattern = re.compile(
        r'public class (\w+)\s*:\s*BaseExcelRepository<[^,]+,\s*([^,]+),\s*([^>]+)>'
    )
    db_schema_pattern = re.compile(r'public class (\w+)\s*:\s*BaseDBSchema')
    prop_pattern = re.compile(r'public\s+([\w.<>\[\]?]+)\s+(\w+)\s*{\s*get;\s*set;\s*}')

    db_schemas = OrderedDict()
    repositories = OrderedDict()
    current_db_schema = None

    print(f"Analyzing '{CSHARP_FILE}' to create repository mapping...")

    with open(CSHARP_FILE, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    for line in tqdm(lines, desc="Parsing C# file"):
        # Remove comments
        line = line.strip().split('//')[0]

        # Look for repository definitions
        repo_match = repo_pattern.search(line)
        if repo_match:
            repo_name = repo_match.group(1)
            db_schema_class = repo_match.group(2).strip()
            blob_schema_class = repo_match.group(3).strip()
            repositories[repo_name] = {
                'db_schema_class': db_schema_class,
                'blob_schema_class': blob_schema_class
            }
            continue

        # Look for database schema definitions
        db_schema_match = db_schema_pattern.search(line)
        if db_schema_match:
            current_db_schema = db_schema_match.group(1)
            db_schemas[current_db_schema] = []
            continue

        # If inside a database schema definition, look for properties
        if current_db_schema:
            prop_match = prop_pattern.search(line)
            if prop_match:
                prop_type = prop_match.group(1)
                prop_name = prop_match.group(2)
                db_schemas[current_db_schema].append({
                    'name': prop_name,
                    'type': prop_type
                })

            # End of class definition
            if '}' in line and '{' not in line:
                current_db_schema = None

    print(f"\nFound {len(repositories)} repositories and {len(db_schemas)} database schemas.")
    return repositories, db_schemas

def main():
    """Main function to create the repository mapping file."""
    repos, schemas = parse_csharp_files()

    if not repos:
        print("No repositories found. Nothing to save.")
        return

    # Combine information into final mapping
    final_map = OrderedDict()

    for repo_name, repo_data in repos.items():
        db_schema_name = repo_data['db_schema_class']

        # Database table name is the database schema class name
        table_name = db_schema_name

        # Find key columns for this schema
        key_columns = schemas.get(db_schema_name, [])

        final_map[repo_name] = {
            'table_name': table_name,
            'key_columns': key_columns,
            'blob_schema_class': repo_data['blob_schema_class']
        }

    print(f"Saving repository mapping to '{OUTPUT_MAP_FILE}'...")
    with open(OUTPUT_MAP_FILE, 'w', encoding='utf-8') as f:
        json.dump(final_map, f, indent=2, ensure_ascii=False)

    print("Done! Repository mapping created successfully.")
    print(f"You can now use '{OUTPUT_MAP_FILE}' as the source of truth for database operations.")

if __name__ == "__main__":
    main()