Question
Find Source, Join, Subquery Tables from a PL/SQL code using python
I have a plsql code,
I want a python code which will help me to find source, join and sub query tables and subquery join tables. Kindly consider that the plsql code would get changed according to the requirement.
Output would be like,
{'source_table': ['schema_name.table1 (rfslt)'],
'join_tables': ['schema_name.table2 (b)', 'table3 (doogal)', 'tab_time (d)'],
'subquery_table': ['schema_name.table6 (e)']}
'subquery_join_table': ['schema_name.table7 (h)']}
{'source_tables': ['table1 (a)'],
'join_tables': ['table2 (b)', 'table3 (c)', 'table4 (d)']}
{'source_tables': ['table1 (a)'],
'join_tables': ['table2 (b)', 'table3 (c)']}
Below is the plsql code which have and from which I need to find the output mentioned above.
Insert into tabs
Select * from (Select * from
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
from schema_name.table6 e
left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);
Insert into tab
Select a.*
from table1 a
left join table2 b on (a.col1 = b.col1)
left join table3 c on (a.col2 = c.col2)
inner join table4 d on (b.col3 = d.col3)
Where a.col4 = 'TEST';
Insert into tab
Select case when a.col1 = 'text' then 'Next on top' end d_col1
from (Select * from table1 tbl where tbl.col0 = 'sample') a,
table2 b,
(Select * from table3 tbl3 where col0 in (select col0 from table4 order by col8) c
Where a.col1 = b.col1(+) and a.col2 = 'TEST' and a.col3 = c.col3;
I have tried the below code to acheive the requirement.
import re
def extract_join_tables(sql_code):
join_tables = []
# Regular expression to match explicit JOIN statements
join_pattern = re.compile(r'\b(?:LEFT|RIGHT|INNER|OUTER)?\s*JOIN\s+(?:\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)|[^\s(]+)\s+(?:\b(?:AS\s+)?(\w+)\b)?\s+ON', re.IGNORECASE)
# Regular expression to match simple table references in the FROM clause
from_pattern = re.compile(r'\bFROM\s+(?:\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)|[^\s(]+)\s+(?:\b(?:AS\s+)?(\w+)\b)?(?:,|$)', re.IGNORECASE)
# Find all JOIN statements
matches = join_pattern.findall(sql_code)
for match in matches:
if match[0]: # match[0] contains the table name, match[1] contains the alias (if any)
join_tables.append(match[0])
# Find all simple table references in the FROM clause
matches = from_pattern.findall(sql_code)
for match in matches:
if match[0]: # match[0] contains the table name, match[1] contains the alias (if any)
join_tables.append(match[0])
return join_tables
plsql_code = """
Insert into tabs
Select * from (Select * from
schema_name.table1 rfslt
LEFT OUTER JOIN schema_name.table2 b ON rfslt.key = b.key
LEFT OUTER JOIN table3 doogal ON REPLACE(rfslt.code_c, ' ', '') = doogal.code_derived
LEFT OUTER JOIN (select *
from schema_name.table6 e
left join schema_name.table7 h on h.id = e.id order by h.change_name) muk ON rfslt.id=muk.id
) prems
WHERE choose1 = 1
) a,
tab_time b
WHERE TRUNC(a.ok_date) = b.g_date(+);
"""
join_tables = extract_join_tables(plsql_code)
print("Join Tables and References:")
for table in join_tables:
print(table)