I’m querying review statuses for kepler so they can get a sense of who is doing all of the PR approvals and how long things are taking.
URL: https://api.github.com/graphql
Header: Authorization: Bearer TOKEN_GOES_HERE
For the token, you’ll need to make a github token here. I used repo & user scopes.
query {
search(query: "org:ORGHERE is:pr created:>2021-01-01", type: ISSUE, last: 50) {
edges {
node {
... on PullRequest {
url
title
createdAt
author {
login
}
reviewDecision
reviews(first:100) {
edges {
node {
author {
login
}
publishedAt
state
}
}
}
}
}
}
}
}
In curl:
curl -v \
-H "Authorization: Bearer $TOKEN" \
-H 'content-type: application/json' \
-X POST \
--data '{"query": "query { search(query: \"org:ORG is:pr created:>2024-01-01\", type: ISSUE, last: 50) { edges { node { ... on PullRequest { url title createdAt author { login } reviewDecision reviews(first:100) { edges { node { author { login } publishedAt state}}}}}}}}"}' \
https://api.github.com/graphql > /tmp/data.json
Script to query the results
from datetime import datetime, timedelta
from collections import defaultdict
import json
def toDatetime(s):
return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z")
reviewers = defaultdict(list)
oldestPr = None
with open('/tmp/data.json') as f:
data = json.loads(''.join(f.readlines()))
for pr in data['data']['search']['edges']:
created = toDatetime(pr['node']['createdAt'])
if oldestPr is None:
oldestPr = created
oldestPr = min(oldestPr, created)
oldestByPerson = defaultdict(list)
for review in pr['node']['reviews']['edges']:
person = review['node']['author']['login']
reviewedAt = toDatetime(review['node']['publishedAt'])
oldestByPerson[person].append(reviewedAt-created)
oldestByPerson = dict(oldestByPerson)
for person, times in oldestByPerson.items():
# if min(times).total_seconds() > 250000:
# print(f"WARNING: Something looks off. Should {pr['node']['title']} really have taken {min(times).total_seconds()} to review?")
reviewers[person].append(min([x.total_seconds() for x in times]))
print(f"Total data set is {len(data['data']['search']['edges'])} PRs since {oldestPr}")
import statistics as stats
sorted_reviewers = sorted(reviewers.items(), key=lambda x: -len(x[1]))
def print_time(td):
t = td.total_seconds()
days = int(t/(60*60*24))
t -= days * 60 * 60 * 24
hours = int(t/(60*60))
t -= hours * 60 * 60
minutes = int(t/60)
out = []
if days != 0:
out.append('%sd' % days)
if days != 0 or hours != 0:
out.append('%sh' % hours)
if days != 0 or hours != 0 or minutes != 0:
out.append('%sm' % minutes)
return ' '.join(out)
for reviewer, times in sorted_reviewers:
if len(times) < 2:
avg = 'N/A'
stdev = 'N/A'
else:
avg = print_time(timedelta(seconds=stats.mean(times)))
stdev = print_time(timedelta(seconds=stats.stdev(times)))
print(f"{reviewer}: {len(times)} reviews. Time to first review: (avg: {avg}; stddev {stdev})")
And to determine the time between first approval and second approval:
from datetime import datetime, timedelta
from collections import defaultdict
import json
with open('/tmp/data.json') as f:
data = json.loads(''.join(f.readlines()))
def toDatetime(s):
return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z")
def print_time(td):
t = td.total_seconds()
days = int(t/(60*60*24))
t -= days * 60 * 60 * 24
hours = int(t/(60*60))
t -= hours * 60 * 60
minutes = int(t/60)
out = []
if days != 0:
out.append('%sd' % days)
if days != 0 or hours != 0:
out.append('%sh' % hours)
if days != 0 or hours != 0 or minutes != 0:
out.append('%sm' % minutes)
return ' '.join(out)
for pr in data['data']['search']['edges']:
approvals =[x for x in pr['node']['reviews']['edges'] if x['node']['state'] == 'APPROVED']
if len(approvals) < 2:
continue
timeWaitingOnSecondApproval = toDatetime(approvals[-1]['node']['publishedAt']) - \
toDatetime(approvals[-2]['node']['publishedAt'])
print(f"Additional {print_time(timeWaitingOnSecondApproval):>8} to get the second approval for {pr['node']['title']}")