mirror of
https://github.com/Imp0ssibl33z/BA-translator.git
synced 2025-12-10 05:19:38 +05:00
382 lines
14 KiB
Python
382 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
C# to FlatBuffers Schema Parser
|
|
|
|
This module parses C# files containing FlatBuffers object definitions and generates
|
|
corresponding .fbs schema files. Requires decompiled C# files to generate the schema.
|
|
"""
|
|
|
|
import re
|
|
import os
|
|
from collections import defaultdict
|
|
from unidecode import unidecode
|
|
|
|
# Configuration
|
|
INPUT_CSHARP_FILE = 'types.cs'
|
|
OUTPUT_FBS_FILE = 'generated_schema.fbs'
|
|
DEFAULT_NAMESPACE = 'FlatData'
|
|
|
|
# Type mapping from C# to FlatBuffers
|
|
CSHARP_TO_FBS_TYPE_MAP = {
|
|
'long': 'long', 'ulong': 'ulong', 'int': 'int', 'uint': 'uint',
|
|
'short': 'short', 'ushort': 'ushort', 'float': 'float', 'double': 'double',
|
|
'bool': 'bool', 'string': 'string', 'byte': 'ubyte', 'sbyte': 'byte'
|
|
}
|
|
|
|
def sanitize_identifier(name):
|
|
"""Clean identifier names for FlatBuffers compatibility."""
|
|
return re.sub(r'[^A-Za-z0-9_.]', '_', unidecode(name))
|
|
|
|
def pascal_to_snake_case(name):
|
|
"""Convert PascalCase to snake_case."""
|
|
name = re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1_\2', name)
|
|
name = re.sub(r'([a-z\d])([A-Z])', r'\1_\2', name)
|
|
name = name.replace('-', '_')
|
|
return name.lower()
|
|
|
|
def csharp_to_fbs_type(csharp_type):
|
|
"""Convert C# type to FlatBuffers type."""
|
|
if csharp_type is None:
|
|
return 'int'
|
|
|
|
# Remove nullable indicators
|
|
csharp_type = csharp_type.replace('?', '')
|
|
|
|
# Check direct mappings
|
|
if csharp_type in CSHARP_TO_FBS_TYPE_MAP:
|
|
return CSHARP_TO_FBS_TYPE_MAP[csharp_type]
|
|
|
|
# Handle custom types
|
|
return sanitize_identifier(csharp_type)
|
|
|
|
|
|
def parse_csharp_file(input_file):
|
|
"""Parse C# file and extract table and enum definitions.
|
|
|
|
Args:
|
|
input_file (str): Path to C# input file
|
|
|
|
Returns:
|
|
dict: Dictionary of parsed definitions with full names as keys
|
|
"""
|
|
all_definitions = {}
|
|
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
current_namespace = "_GLOBAL_"
|
|
in_block = None
|
|
current_name = None
|
|
current_fields = []
|
|
current_enum_base_type = 'int'
|
|
seen_enum_values = set()
|
|
|
|
for line in f:
|
|
line = line.strip()
|
|
|
|
# Parse namespace declarations
|
|
ns_match = re.match(r'namespace (\S+)', line)
|
|
if ns_match:
|
|
current_namespace = ns_match.group(1).replace(';', '')
|
|
continue
|
|
|
|
# End of block
|
|
if line.startswith('}') and in_block:
|
|
full_name = f"{current_namespace}.{current_name}"
|
|
if in_block == 'table':
|
|
all_definitions[full_name] = {
|
|
'type': 'table',
|
|
'ns': current_namespace,
|
|
'name': current_name,
|
|
'fields': current_fields
|
|
}
|
|
elif in_block == 'enum':
|
|
all_definitions[full_name] = {
|
|
'type': 'enum',
|
|
'ns': current_namespace,
|
|
'name': current_name,
|
|
'base': current_enum_base_type,
|
|
'fields': current_fields
|
|
}
|
|
|
|
in_block = None
|
|
current_fields = []
|
|
continue
|
|
|
|
# Parse struct/table definitions
|
|
table_match = re.search(r'public struct (\w+) : IFlatbufferObject', line)
|
|
enum_match = re.search(r'public enum (\w+)(?:\s*:\s*(\w+))?', line)
|
|
|
|
if table_match:
|
|
in_block = 'table'
|
|
current_name = sanitize_identifier(table_match.group(1))
|
|
continue
|
|
elif enum_match:
|
|
in_block = 'enum'
|
|
current_name = sanitize_identifier(enum_match.group(1))
|
|
csharp_base_type = enum_match.group(2)
|
|
current_enum_base_type = csharp_to_fbs_type(csharp_base_type)
|
|
seen_enum_values.clear()
|
|
continue
|
|
|
|
if not in_block:
|
|
continue
|
|
|
|
# Parse enum fields
|
|
if in_block == 'enum':
|
|
field_match = re.match(r'(\w+)\s*=\s*(-?\d+)', line)
|
|
if field_match:
|
|
field_name = sanitize_identifier(field_match.group(1))
|
|
field_value = int(field_match.group(2))
|
|
|
|
if field_value not in seen_enum_values:
|
|
seen_enum_values.add(field_value)
|
|
current_fields.append(f'{field_name} = {field_value}')
|
|
continue
|
|
|
|
# Parse table fields
|
|
if in_block == 'table':
|
|
if not line.startswith('public'):
|
|
continue
|
|
|
|
# Parse vector methods
|
|
vec_match = re.search(
|
|
r'public\s+(?:[^\s<]+<(\S+)>|(\S+))\s+(\w+)\s*\(int\s+\w+\)',
|
|
line
|
|
)
|
|
if vec_match:
|
|
csharp_type = vec_match.group(1) if vec_match.group(1) else vec_match.group(2)
|
|
fbs_type = csharp_to_fbs_type(csharp_type)
|
|
current_fields.append({
|
|
'original': sanitize_identifier(vec_match.group(3)),
|
|
'type': f'[{fbs_type}]'
|
|
})
|
|
continue
|
|
|
|
# Parse property definitions
|
|
prop_match = re.search(
|
|
r'public\s+(?:Nullable<(\S+)>|ArraySegment<byte>|(\S+))\s+(\w+)\s*{',
|
|
line
|
|
)
|
|
if prop_match:
|
|
nullable_type, full_type, csharp_name = prop_match.groups()
|
|
csharp_type = nullable_type if nullable_type else full_type
|
|
|
|
# Skip internal FlatBuffers fields
|
|
if csharp_name == 'ByteBuffer' or csharp_name.endswith('Length'):
|
|
continue
|
|
|
|
# Determine field type
|
|
if csharp_type == 'ArraySegment<byte>':
|
|
field_type = '[ubyte]'
|
|
else:
|
|
field_type = csharp_to_fbs_type(csharp_type)
|
|
|
|
current_fields.append({
|
|
'original': sanitize_identifier(csharp_name),
|
|
'type': field_type
|
|
})
|
|
continue
|
|
|
|
# Handle global namespace
|
|
if "_GLOBAL_" in {d['ns'] for d in all_definitions.values()}:
|
|
for name, data in list(all_definitions.items()):
|
|
if data['ns'] == "_GLOBAL_":
|
|
new_name = f"{DEFAULT_NAMESPACE}.{data['name']}"
|
|
all_definitions[new_name] = data
|
|
data['ns'] = DEFAULT_NAMESPACE
|
|
del all_definitions[name]
|
|
|
|
return all_definitions
|
|
|
|
|
|
def find_full_type_name(base_type, current_ns, all_defs):
|
|
"""Find the full qualified name for a type reference.
|
|
|
|
Args:
|
|
base_type (str): Base type name to find
|
|
current_ns (str): Current namespace context
|
|
all_defs (dict): All available type definitions
|
|
|
|
Returns:
|
|
str or None: Full qualified type name if found
|
|
"""
|
|
# Try current namespace first
|
|
if f"{current_ns}.{base_type}" in all_defs:
|
|
return f"{current_ns}.{base_type}"
|
|
|
|
# Try default namespace
|
|
if f"{DEFAULT_NAMESPACE}.{base_type}" in all_defs:
|
|
return f"{DEFAULT_NAMESPACE}.{base_type}"
|
|
|
|
# Try global scope
|
|
if base_type in all_defs:
|
|
return base_type
|
|
|
|
# Search in all namespaces
|
|
for name in all_defs:
|
|
if name.endswith(f".{base_type}"):
|
|
return name
|
|
|
|
return None
|
|
|
|
def generate_fbs_schema(all_definitions, output_file):
|
|
"""Generate FlatBuffers schema file from parsed definitions.
|
|
|
|
Args:
|
|
all_definitions (dict): All parsed type definitions
|
|
output_file (str): Path to output .fbs file
|
|
"""
|
|
# Step 1: Filter and resolve dependencies
|
|
root_types = {name for name, data in all_definitions.items() if data['type'] == 'table'}
|
|
used_types = set()
|
|
queue = list(root_types)
|
|
|
|
while queue:
|
|
type_name = queue.pop(0)
|
|
if type_name in used_types or type_name not in all_definitions:
|
|
continue
|
|
|
|
used_types.add(type_name)
|
|
data = all_definitions[type_name]
|
|
|
|
if data['type'] == 'table':
|
|
for field in data['fields']:
|
|
base_type = field['type'].strip('[]')
|
|
found_dep = find_full_type_name(base_type, data['ns'], all_definitions)
|
|
if found_dep and found_dep not in used_types:
|
|
queue.append(found_dep)
|
|
|
|
final_definitions = {name: data for name, data in all_definitions.items() if name in used_types}
|
|
|
|
# Step 2: Separate tables and enums
|
|
tables = {name: data for name, data in final_definitions.items() if data['type'] == 'table'}
|
|
enums = {name: data for name, data in final_definitions.items() if data['type'] == 'enum'}
|
|
|
|
# Step 3: Topological sort for dependency order
|
|
in_degree = {t: 0 for t in tables}
|
|
adj = defaultdict(list)
|
|
|
|
for name, data in tables.items():
|
|
for field in data['fields']:
|
|
base_type = field['type'].strip('[]')
|
|
found_dep = find_full_type_name(base_type, data['ns'], tables)
|
|
if found_dep:
|
|
adj[found_dep].append(name)
|
|
in_degree[name] += 1
|
|
|
|
# Topological sort
|
|
queue = [t for t in tables if in_degree[t] == 0]
|
|
sorted_tables = []
|
|
|
|
while queue:
|
|
t = queue.pop(0)
|
|
sorted_tables.append(t)
|
|
for neighbor in adj.get(t, []):
|
|
in_degree[neighbor] -= 1
|
|
if in_degree[neighbor] == 0:
|
|
queue.append(neighbor)
|
|
|
|
# Handle cycles
|
|
cyclic_tables = set(tables.keys()) - set(sorted_tables)
|
|
sorted_tables.extend(list(cyclic_tables))
|
|
|
|
# Step 4: Group by namespace
|
|
defs_by_ns = defaultdict(lambda: {'enums': [], 'tables': [], 'cycles': []})
|
|
|
|
for name, data in enums.items():
|
|
defs_by_ns[data['ns']]['enums'].append(data)
|
|
|
|
for name in sorted_tables:
|
|
data = tables[name]
|
|
defs_by_ns[data['ns']]['tables'].append(data)
|
|
if name in cyclic_tables:
|
|
defs_by_ns[data['ns']]['cycles'].append(data['name'])
|
|
|
|
# Step 5: Generate FlatBuffers schema file
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write('// Auto-generated FlatBuffers schema\n')
|
|
f.write('// Field order is preserved. Key attributes are properly handled.\n\n')
|
|
|
|
for ns, data in sorted(defs_by_ns.items()):
|
|
f.write(f'// ----- NAMESPACE: {ns} -----\n')
|
|
f.write(f'namespace {ns};\n\n')
|
|
|
|
# Forward declarations for circular dependencies
|
|
if data['cycles']:
|
|
f.write('// Forward declarations for circular dependencies\n')
|
|
for table_name in sorted(data['cycles']):
|
|
f.write(f'table {table_name};\n')
|
|
f.write('\n')
|
|
|
|
# Enums
|
|
if data['enums']:
|
|
f.write('// --- Enums ---\n')
|
|
for definition in sorted(data['enums'], key=lambda x: x['name']):
|
|
f.write(f'enum {definition["name"]} : {definition["base"]} {{\n')
|
|
for field in definition['fields']:
|
|
f.write(f' {field},\n')
|
|
f.write('}\n\n')
|
|
|
|
# Tables
|
|
if data['tables']:
|
|
f.write('// --- Tables ---\n')
|
|
for definition in data['tables']:
|
|
f.write(f'table {definition["name"]} {{\n')
|
|
|
|
# Handle field naming conflicts
|
|
snake_to_original = defaultdict(list)
|
|
for field in definition['fields']:
|
|
snake_to_original[pascal_to_snake_case(field['original'])].append(field['original'])
|
|
|
|
# Track if key attribute was added
|
|
key_field_added = False
|
|
|
|
for field in definition['fields']:
|
|
snake_name = pascal_to_snake_case(field['original'])
|
|
field_name = (field['original'] if len(snake_to_original[snake_name]) > 1
|
|
else snake_name)
|
|
|
|
is_array = field['type'].startswith('[')
|
|
base_type = field['type'].strip('[]')
|
|
final_type_str = field['type']
|
|
|
|
# Resolve type references
|
|
full_dep_name = find_full_type_name(base_type, definition['ns'], final_definitions)
|
|
if full_dep_name:
|
|
dep_data = final_definitions[full_dep_name]
|
|
simple_name = dep_data['name']
|
|
|
|
if dep_data['ns'] != definition['ns']:
|
|
final_type_str = f"{dep_data['ns']}.{simple_name}"
|
|
else:
|
|
final_type_str = simple_name
|
|
|
|
if is_array:
|
|
final_type_str = f"[{final_type_str}]"
|
|
|
|
# Add key attribute for primary key fields
|
|
key_suffix = ""
|
|
if (not key_field_added and
|
|
field_name.lower() in ['key', 'id'] and
|
|
not is_array):
|
|
key_suffix = " (key)"
|
|
key_field_added = True
|
|
|
|
f.write(f' {field_name}:{final_type_str}{key_suffix};\n')
|
|
|
|
f.write('}\n\n')
|
|
|
|
print(f"Success! Schema with {len(final_definitions)} types saved to {output_file}")
|
|
|
|
def main():
|
|
"""Main function to run the parser."""
|
|
if not os.path.exists(INPUT_CSHARP_FILE):
|
|
print(f"Error: Input file '{INPUT_CSHARP_FILE}' not found.")
|
|
return
|
|
|
|
print("Starting C# parsing...")
|
|
all_definitions = parse_csharp_file(INPUT_CSHARP_FILE)
|
|
print(f"Parsed {len(all_definitions)} definitions. Generating .fbs schema...")
|
|
generate_fbs_schema(all_definitions, OUTPUT_FBS_FILE)
|
|
|
|
if __name__ == '__main__':
|
|
main() |