I have 2 storage accounts and I am copying file in the source. then from source, the files are getting replicated into destination. I want to get the Max , Min and Avg latency of files.
Example:
in source: there are 3 files and replicated into destination.
Now the datediff between each files lastModified in source and destination and get the Max, Min, Avg latency.
The query I am using, is working for very small amount of data and smaller duration. When I am trying to get for last 3 days, is giving error.
Note: in each copy iteration, almost 5 lacs files are copied to source.
What I have tried:
let Srcsubstring = "https://xxxxxxxxx.blob.core.windows.net:443/test/";
let Destsubstring = "https://xxxxxxxxx.blob.core.windows.net:443/test/";
let substring2 = "?";
let substring3 = "_";
let filteredLogs = materialize(
StorageBlobLogs
| where AccountName in ("xxxxxxxxxx", "xxxxxxxxxx")
| where OperationName in ("CopyBlob", "PutBlob")
| extend Uri = trim_start(Srcsubstring, Uri)
| extend Uri = trim_start(Destsubstring, Uri)
| extend Uri = substring(Uri, 0, indexof(Uri, substring2))
| extend prefix = substring(Uri, 0, indexof(Uri, substring3))
| project Uri, LastModifiedTime, AccountName, OperationName, RequestBodySize, TimeGenerated, prefix
);
let replicationLatency = materialize(
filteredLogs
| summarize
Source_LastModifiedTime = maxif(LastModifiedTime, OperationName == "PutBlob"),
Destination_LastModifiedTime = maxif(LastModifiedTime, OperationName == "CopyBlob")
by prefix, Uri
| extend ReplicationLatency = datetime_diff('second', Destination_LastModifiedTime, Source_LastModifiedTime)
| summarize
Max_ReplicationLatency = max(ReplicationLatency),
Min_ReplicationLatency = min(ReplicationLatency),
Avg_ReplicationLatency = avg(ReplicationLatency)
by prefix
);
let logSummarization = materialize(
filteredLogs
| summarize
TimeGenerated = min(TimeGenerated),
Max_TimeGenerated = max(TimeGenerated),
Min_LastModifiedTime = min(LastModifiedTime),
Max_LastModifiedTime = max(LastModifiedTime),
Source_MinLastModifiedTime = minif(LastModifiedTime, OperationName == "PutBlob"),
Source_MaxLastModifiedTime = maxif(LastModifiedTime, OperationName == "PutBlob"),
Destination_MinLastModifiedTime = minif(LastModifiedTime, OperationName == "CopyBlob"),
Destination_MaxLastModifiedTime = maxif(LastModifiedTime, OperationName == "CopyBlob"),
PutBlob_Ingress_Gb = sumif(RequestBodySize, OperationName == "PutBlob") / 1024 / 1024 / 1024,
CopyBlob_Ingress_Gb = sumif(RequestBodySize, OperationName == "CopyBlob") / 1024 / 1024 / 1024,
Source_FileCount = dcountif(Uri, OperationName == "PutBlob"),
Destination_FileCount = dcountif(Uri, OperationName == "CopyBlob"),
Src_AccountName = maxif(AccountName, OperationName == "PutBlob"),
Dest_AccountName = maxif(AccountName, OperationName == "CopyBlob")
by prefix
);
replicationLatency
| join kind=inner (logSummarization) on prefix
| project
prefix,
TimeGenerated,
Src_AccountName,
Dest_AccountName,
Source_MinLastModifiedTime,
Source_MaxLastModifiedTime,
Destination_MinLastModifiedTime,
Destination_MaxLastModifiedTime,
StartOff_Latency = datetime_diff('second', Destination_MinLastModifiedTime, Source_MinLastModifiedTime),
PutBlob_Ingress_Gb,
CopyBlob_Ingress_Gb,
Source_FileCount,
Destination_FileCount,
FileDifference = Source_FileCount - Destination_FileCount,
Replication_Latency_seconds = datetime_diff('second', Destination_MaxLastModifiedTime, Source_MaxLastModifiedTime),
Max_ReplicationLatency,
Min_ReplicationLatency,
Avg_ReplicationLatency
| order by TimeGenerated desc