I am just beginning the process of merging two fairly large databases. This merge is complicated by the fact that there are a number of shared tables between the two databases. To help assess the situation, I've written a short python script to generate lists of tables for each database, then find any overlap / difference.
So, if database1 had the following tables:
and database2 had the following tables:
The script would generate the following output files: Diff:
Overlap:
table_c
import os, sys, re
def diff_tables(*args): databases = list(args) db_tables = [] for db in databases: os.system('psql -c "\dt" %s > %s_tables' % (db, db)) db_file = open("%s_tables" % (db), 'r') db_lines = db_file.readlines() tables_found = [] for line in db_lines: table_name = extract_table(line) if table_name: tables_found.append(table_name) db_tables.append(tables_found) db_file.close()
diffs = [[] for i in range(len(db_tables))]
overlaps = [[] for i in range(len(db_tables))]
primary_db = databases.pop(0)
primary_tables = db_tables.pop(0)
for table in primary_tables:
for i in range(len(db_tables)):
if table not in db_tables:
diffs.append(table)
else:
overlaps.append(table)
for i in range(len(db_tables)):
diff_file = open('diff_%s_%s' % (primary_db, databases), 'w')
diff_file.write('\n'.join(diffs*))
diff_file.close()
overlap_file = open('overlap_%s_%s' % (primary_db, databases), 'w')
overlap_file.write('\n'.join(overlaps*))
overlap_file.close()
def extract_table(line): match = re.search(r'\s(public|private)\s\|\s([a-zA-Z0-9_]+)\s\|\stable\s\|.*', line) if match: return match.groups()[1] return None
if name == 'main': cmd_args = sys.argv if len(cmd_args) < 3: print 'Find the differences in database tables, given 2 or more databases' print '----------------------------------------------------------' print 'Provide 2 or more databases as arguments: %s database1 database2' % (cmd_args[0]) else: diff_tables(cmd_args[1], *cmd_args[2:])
Commenting has been closed, but please feel free to contact me