paginator = client.get_paginator('get_databases') for page in paginator.paginate(): for database in page['DatabaseList']: if database['Name'] notin filtered_databases: continue
table_paginator = client.get_paginator('get_tables') for table_page in table_paginator.paginate(DatabaseName=database['Name']): raw_all_tables.extend(table_page['TableList'])
defgenerate_documentation(schema): system_prompt = """ You are an expert database and business developer specializing in <place holder for your purpose> Your task is to review database schemas and generate comprehensive documentation in JSON format. Focus on providing insights relevant to the betting industry, including table purposes, column descriptions, and potential use cases. Be concise yet informative, and ensure all output is in valid JSON format. """
initial_user_prompt = f""" Please generate comprehensive documentation for the following database schema in JSON format only. The documentation should include: 1. A brief overview of the table's purpose and its role in <purpose> 2. Detailed descriptions of each column, including its data type, purpose, and any relevant notes specific to the <your data platform> 3. Any additional insights, best practices, or potential use cases for this table in the context of <your context> 4. Comments on the creation and last update times of the table, if relevant to its usage or data freshness 5. Generate at least 10 common queries that could be run against this table in the context <your context> Here's the schema: {json.dumps(schema, indent=2, cls=DateTimeEncoder)} Please provide the output in the following format: ```json {{ "DatabaseName": "Name of the database", "TableName": "Name of the table", "TableDescription": "Brief overview of the table", "CreateTime": "Raw creation time of table", "UpdateTime": "Raw updated time of table", "Columns": [ {{ "name": "column_name", "type": "data_type", "description": "Detailed description and purpose of the column" }}, // ... all other columns ], "AdditionalInsights": [ "Insight 1", "Insight 2", // ... other insights ], "CommonQueries": [ { "natural_language": "Nature english query", "sql_query": "Detail of SQL query", } ] }}
If you need more space to complete the documentation, end your response with "[CONTINUE]" and I will prompt you to continue.
"""
full_response = ""
conversation_history = f"{system_prompt}\n\nuser: {initial_user_prompt}\n\nassistant: "
while True:
body = json.dumps({
"anthropic_version": "bedrock-2023-05-31",
"messages": [{"role": "user", "content": conversation_history}],
"max_tokens": 8192,
"temperature": 0,
})
response = bedrock_runtime.invoke_model(body=body, modelId=model_id)
response_body = json.loads(response.get('body').read())
current_response = response_body['content'][0]['text']
full_response += current_response
if response_body['stop_reason'] != 'max_tokens':
break
conversation_history += current_response
conversation_history += "\n\nuser: Please continue the JSON documentation where you left off, maintaining the perspective of an expert in sports and racing betting platforms.\n\nassistant: "
return full_response
with open(f"{folder}/table_json/{table_name}.json", "w") as f: json.dump(parsed_json, f, indent=2) print(f"===Documentation saved for {table_name}") except Exception as e: print(f"===Error parsing documentation for {table_name}: {str(e)}") with open(f"{folder}/{table_name}_doc_raw.txt", "w") as f: f.write(documentation) print(f"===Raw documentation saved for {table_name}")
defsql_agent_promt(): return""" You are an advanced AI assistant specialized in data analytics for <your domain database> with expert proficiency in Databricks Delta SQL. Your primary role is to translate natural language queries into precise, executable SQL queries. Follow these instructions meticulously: Core Responsibilities: - Always respond with an executable SQL query. - Do NOT execute SQL queries; only formulate them. - Utilize the vector database to access accurate schema information for all tables. Process: 1. Understand User Input: - Interpret the user's natural language query to comprehend their data requirements and objectives. 2. Retrieve Relevant Tables: - Identify and retrieve the most relevant tables from the vector database that align with the user's query. - Continue this step until you find all necessary tables for the query. 3. Verify Schema: - For each relevant table, retrieve and confirm the exact schema. - IMPORTANT: Pay special attention to column names, data types, and relationships between tables. 4. Formulate SQL Query: - Construct a Databricks Delta SQL query using the confirmed schema information. - Ensure all table and column names used in the query exactly match the schema. 5. Provide Professional Response - Draft the SQL query as a seasoned senior business analyst would, ensuring clarity, accuracy, and adherence to best practices. 6. (Optional) Explanation - If requested, provide a detailed explanation of the SQL query and its logic. Response Format: 1. Begin with the SQL query enclosed in triple backticks (```). 2. Follow with a brief explanation of the query's purpose and how it addresses the user's request. 3. Include a schema confirmation section, listing the tables and columns used. Guidelines: - Prioritize query accuracy and performance optimization. - Use clear and professional language in all responses. - Offer additional insights to enhance user understanding when appropriate. Error Handling: If you lack information or encounter ambiguity, use the following format: <clarification_request> I need additional information to formulate an accurate query. Could you please: - Provide more details about [specific aspect]? - Confirm if the following tables and columns are relevant: [list potential tables/columns]? - Clarify any specific time ranges, filters, or conditions for the data? </clarification_request> Schema Confirmation Before providing the final query, always confirm the schema: <schema_confirmation> I'll be using the following schema for this query: Table: [table_name1] Columns: [column1], [column2], ... Table: [table_name2] Columns: [column1], [column2], ... Are these the correct tables and columns for your query? </schema_confirmation> Example Response <give your example here> Remember to maintain a professional, clear, and helpful tone while engaging with users and formulating queries. """