Initial commit
This commit is contained in:
240
skills/fabric-cli/scripts/create_direct_lake_model.py
Normal file
240
skills/fabric-cli/scripts/create_direct_lake_model.py
Normal file
@@ -0,0 +1,240 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Create a Direct Lake semantic model from lakehouse tables.
|
||||
|
||||
Usage:
|
||||
python3 create_direct_lake_model.py "src.Workspace/LH.Lakehouse" "dest.Workspace/Model.SemanticModel" -t schema.table
|
||||
|
||||
Requirements:
|
||||
- fab CLI installed and authenticated
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run_fab(args: list[str]) -> str:
|
||||
"""Run fab command and return output."""
|
||||
result = subprocess.run(["fab"] + args, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
print(f"fab error: {result.stderr}", file=sys.stderr)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def get_lakehouse_sql_endpoint(workspace: str, lakehouse: str) -> dict:
|
||||
"""Get lakehouse SQL endpoint info."""
|
||||
path = f"{workspace}/{lakehouse}"
|
||||
output = run_fab(["get", path, "-q", "properties.sqlEndpointProperties"])
|
||||
return json.loads(output)
|
||||
|
||||
|
||||
def get_table_schema(workspace: str, lakehouse: str, schema: str, table: str) -> list:
|
||||
"""Get table schema from lakehouse (parses text output)."""
|
||||
path = f"{workspace}/{lakehouse}/Tables/{schema}/{table}"
|
||||
output = run_fab(["table", "schema", path])
|
||||
|
||||
# Parse text table format:
|
||||
# name type
|
||||
# ------------------------------------------
|
||||
# col_name col_type
|
||||
columns = []
|
||||
in_data = False
|
||||
for line in output.split("\n"):
|
||||
line = line.strip()
|
||||
if line.startswith("---"):
|
||||
in_data = True
|
||||
continue
|
||||
if in_data and line:
|
||||
parts = line.split()
|
||||
if len(parts) >= 2:
|
||||
columns.append({"name": parts[0], "type": parts[1]})
|
||||
return columns
|
||||
|
||||
|
||||
def tmdl_data_type(sql_type: str) -> str:
|
||||
"""Convert SQL type to TMDL data type."""
|
||||
sql_type = sql_type.lower()
|
||||
if 'int' in sql_type:
|
||||
return 'int64'
|
||||
elif 'float' in sql_type or 'double' in sql_type or 'decimal' in sql_type:
|
||||
return 'double'
|
||||
elif 'bool' in sql_type or 'bit' in sql_type:
|
||||
return 'boolean'
|
||||
elif 'date' in sql_type or 'time' in sql_type:
|
||||
return 'dateTime'
|
||||
else:
|
||||
return 'string'
|
||||
|
||||
|
||||
def create_model_tmdl(model_name: str, table_name: str) -> str:
|
||||
"""Create model.tmdl content."""
|
||||
return f"""model '{model_name}'
|
||||
\tculture: en-US
|
||||
\tdefaultPowerBIDataSourceVersion: powerBI_V3
|
||||
|
||||
ref table '{table_name}'
|
||||
"""
|
||||
|
||||
|
||||
def create_expressions_tmdl(connection_string: str, endpoint_id: str) -> str:
|
||||
"""Create expressions.tmdl content."""
|
||||
return f"""expression DatabaseQuery =
|
||||
\t\tlet
|
||||
\t\t\tdatabase = Sql.Database("{connection_string}", "{endpoint_id}")
|
||||
\t\tin
|
||||
\t\t\tdatabase
|
||||
\tlineageTag: {uuid.uuid4()}
|
||||
"""
|
||||
|
||||
|
||||
def create_table_tmdl(table_name: str, schema_name: str, columns: list) -> str:
|
||||
"""Create table.tmdl content."""
|
||||
lines = [
|
||||
f"table '{table_name}'",
|
||||
f"\tlineageTag: {uuid.uuid4()}",
|
||||
f"\tsourceLineageTag: [{schema_name}].[{table_name}]",
|
||||
""
|
||||
]
|
||||
|
||||
# Add columns
|
||||
for col in columns:
|
||||
col_name = col['name']
|
||||
data_type = tmdl_data_type(col['type'])
|
||||
lines.extend([
|
||||
f"\tcolumn '{col_name}'",
|
||||
f"\t\tdataType: {data_type}",
|
||||
f"\t\tlineageTag: {uuid.uuid4()}",
|
||||
f"\t\tsourceLineageTag: {col_name}",
|
||||
f"\t\tsummarizeBy: none",
|
||||
f"\t\tsourceColumn: {col_name}",
|
||||
"",
|
||||
f"\t\tannotation SummarizationSetBy = Automatic",
|
||||
""
|
||||
])
|
||||
|
||||
# Add partition
|
||||
lines.extend([
|
||||
f"\tpartition '{table_name}' = entity",
|
||||
f"\t\tmode: directLake",
|
||||
f"\t\tsource",
|
||||
f"\t\t\tentityName: {table_name}",
|
||||
f"\t\t\tschemaName: {schema_name}",
|
||||
f"\t\t\texpressionSource: DatabaseQuery",
|
||||
""
|
||||
])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def create_database_tmdl() -> str:
|
||||
"""Create database.tmdl content."""
|
||||
return f"""database '{uuid.uuid4()}'
|
||||
"""
|
||||
|
||||
|
||||
def create_pbism() -> str:
|
||||
"""Create definition.pbism content."""
|
||||
return json.dumps({
|
||||
"$schema": "https://developer.microsoft.com/json-schemas/fabric/item/semanticModel/definitionProperties/1.0.0/schema.json",
|
||||
"version": "4.0",
|
||||
"settings": {}
|
||||
}, indent=2)
|
||||
|
||||
|
||||
def create_platform(model_name: str) -> str:
|
||||
"""Create .platform content."""
|
||||
return json.dumps({
|
||||
"$schema": "https://developer.microsoft.com/json-schemas/fabric/gitIntegration/platformProperties/2.0.0/schema.json",
|
||||
"metadata": {
|
||||
"type": "SemanticModel",
|
||||
"displayName": model_name
|
||||
},
|
||||
"config": {
|
||||
"version": "2.0",
|
||||
"logicalId": str(uuid.uuid4())
|
||||
}
|
||||
}, indent=2)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Create Direct Lake semantic model")
|
||||
parser.add_argument("source", help="Source: Workspace.Workspace/Lakehouse.Lakehouse")
|
||||
parser.add_argument("dest", help="Destination: Workspace.Workspace/Model.SemanticModel")
|
||||
parser.add_argument("-t", "--table", required=True, help="Table: schema.table_name")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse source
|
||||
src_parts = args.source.split("/")
|
||||
src_workspace = src_parts[0]
|
||||
src_lakehouse = src_parts[1].replace(".Lakehouse", "") + ".Lakehouse"
|
||||
|
||||
# Parse destination
|
||||
dest_parts = args.dest.split("/")
|
||||
dest_workspace = dest_parts[0]
|
||||
model_name = dest_parts[1].replace(".SemanticModel", "")
|
||||
|
||||
# Parse table
|
||||
table_parts = args.table.split(".")
|
||||
schema_name = table_parts[0]
|
||||
table_name = table_parts[1]
|
||||
|
||||
print(f"Source: {src_workspace}/{src_lakehouse}")
|
||||
print(f"Table: {schema_name}.{table_name}")
|
||||
print(f"Dest: {dest_workspace}/{model_name}.SemanticModel")
|
||||
|
||||
# Get SQL endpoint
|
||||
print("\nGetting SQL endpoint...")
|
||||
endpoint = get_lakehouse_sql_endpoint(src_workspace, src_lakehouse)
|
||||
print(f" Connection: {endpoint['connectionString']}")
|
||||
print(f" ID: {endpoint['id']}")
|
||||
|
||||
# Get table schema
|
||||
print(f"\nGetting table schema for {schema_name}.{table_name}...")
|
||||
columns = get_table_schema(src_workspace, src_lakehouse, schema_name, table_name)
|
||||
print(f" Found {len(columns)} columns")
|
||||
|
||||
# Create temp directory with TMDL
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
model_dir = Path(tmpdir) / f"{model_name}.SemanticModel"
|
||||
def_dir = model_dir / "definition"
|
||||
tables_dir = def_dir / "tables"
|
||||
|
||||
model_dir.mkdir()
|
||||
def_dir.mkdir()
|
||||
tables_dir.mkdir()
|
||||
|
||||
# Write files
|
||||
print("\nCreating TMDL files...")
|
||||
|
||||
(model_dir / ".platform").write_text(create_platform(model_name))
|
||||
(model_dir / "definition.pbism").write_text(create_pbism())
|
||||
(def_dir / "model.tmdl").write_text(create_model_tmdl(model_name, table_name))
|
||||
(def_dir / "database.tmdl").write_text(create_database_tmdl())
|
||||
(def_dir / "expressions.tmdl").write_text(
|
||||
create_expressions_tmdl(endpoint['connectionString'], endpoint['id'])
|
||||
)
|
||||
(tables_dir / f"{table_name}.tmdl").write_text(
|
||||
create_table_tmdl(table_name, schema_name, columns)
|
||||
)
|
||||
|
||||
print(f" Created: {model_dir}")
|
||||
for f in model_dir.rglob("*"):
|
||||
if f.is_file():
|
||||
print(f" {f.relative_to(model_dir)}")
|
||||
|
||||
# Import to Fabric
|
||||
print(f"\nImporting to {dest_workspace}...")
|
||||
dest_path = f"{dest_workspace}/{model_name}.SemanticModel"
|
||||
result = run_fab(["import", dest_path, "-i", str(model_dir), "-f"])
|
||||
print(result)
|
||||
|
||||
print("\nDone!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user