Dyota's blog

Azure Synapse: git repo rip out SQL scripts

The develop section of an Azure Synapse workspace can be connected to a git repository. This is a good way of keeping track of commit history, and also the ability to pull down all of the code down to a local source as text files. I like to do this because, while the online editor is pretty good, it's a lot nicer to be able to use my own tooling (e.g. Azure Data Studio).

The thing was want to get at are the SQL scripts. However, in the git repo, the SQL scripts are packaged up as text values inside of JSON objects.

It looks something like this:

{
  "name": "<script_name>",
  "properties": {
    "content": {
      "query": "<script_code>",
      "metadata": { "language": "sql" },
      "currentConnection": { "databaseName": "<database_name>", "poolName": "Built-in" },
      "resultLimit": 5000
    },
    "type": "SqlQuery"
  }
}

The actual query is stored inside .properties.content.query, with escape characters in it.

This PowerShell script goes into each .json file and extracts the SQL files nicely.

function pullRepo ($repo, $dest) {
    $currentDir = (Get-Location).path
    
    if (!(Test-Path $dest)) { New-Item -Itemtype Directory $dest }
    
    Set-Location $root
    
    # restore the local repo to the way it is in the remote repo
    Set-Location $repo
    git pull
    git reset --hard origin
    
    # rip out all of the SQL queries into their own files
    Get-ChildItem "$repo\sqlscript" |
        ForEach-Object {
            $basename = $_.BaseName
            
            $folder = $basename.split('_')[0]
            
            $one = Get-Content $_.FullName | convertfrom-json
            
            $path = "$dest\$folder"

            if (!(Test-Path $path)) { New-Item -Itemtype Directory $path }

            $one.properties.content.query > "$path\$basename.sql"
        }
    Set-Location $currentDir
}

function getSelect ($source, $dest) {
    # this function goes through all of the SQL scripts in Core and Reporting
    # and pulls out just the SELECT statement (takes out the CREATE OR ALTER VIEW at the start and GO at the end)

    Get-ChildItem $source -Recurse -File | 
        ForEach-Object {
            $folder = ($_.DirectoryName).Split("\")[-1]

            # take out the whitespace before and after
            # then replace all linebreaks with \n so that it's all in one line 
            # so that we can do regex on it
            $text = ((Get-Content $_) -join "`n").Trim().Replace("`n", "\n")

            # check if it has the word GO at the end 
            # and set up capture groups accordingly
            $regexgo = "(?<=.*)GO$"
            $hasgo = ($text | Select-String  $regexgo -AllMatches).Matches.Success

            $regex = if ($hasgo) { "(?<=CREATE OR ALTER VIEW.*AS\\n).*(?=GO)" }
                else { "(?<=CREATE OR ALTER VIEW.*AS\\n).*" }

            if (!(Test-Path $dest)) { New-Item -ItemType Directory $dest }

            $selectStatement = ($text | Select-String  $regex -AllMatches).Matches.Value

            $selectStatement.Trim().Replace("\n", "`n") > "$dest\$($_.BaseName).sql"

        }
}

To package it all p again, these functions do that:

function createCreate ($source, $dest) {
    # compile into CREATE OR ALTER scripts again
    $selectScripts = dir $source -Recurse -File
    
    $selectScripts |
        % {
            $folder = ($_.DirectoryName).Split("\")[-1]
    
            $filename = $_.BaseName
    
            $parts = $filename.split("_")
            $db = $filename.split("_")[0]
            $table = $filename.split("_")[1..20] -join "_"
    
            $create = "CREATE OR ALTER VIEW [$db].[$table]`n`tAS"
    
            $text = (cat $_) -join "`n"
    
            $text = "$create`n$text"
    
            $text > "$dest\$folder\$filename.sql"
    
        }
}


function packageJson ($source, $repo) {
    # package it all up into the .json files for the repo
    
    # take the edited files, and write them back in, ready for git push
    dir "$source" -r -File |
        # select -first 1 |
        % {
            # find the corresponding json file
            $basename = $_.basename
    
            # get the contents
            $content = (cat $_) -join "`n"
    
            $target = "$repo\sqlscript\$basename.json"
    
            # get the json object it will be replacing
            $json = cat $target | convertfrom-json
    
            # make the replacement int the right place
            $json.properties.content.query = $content
    
            # write everything back into the file again
            $json | convertto-json -depth 100 > $target
        }
}

#git #powershell #sql #synapse