github pull request querying

I’m querying review statuses for kepler so they can get a sense of who is doing all of the PR approvals and how long things are taking.

URL: https://api.github.com/graphql Header: Authorization: Bearer TOKEN_GOES_HERE

For the token, you’ll need to make a github token here. I used repo & user scopes.

query {
  search(query: "org:ORGHERE is:pr created:>2021-01-01", type: ISSUE, last: 50) {
    edges {
      node {
        ... on PullRequest {
          url
          title
          createdAt
          author {
            login
          }
 
          reviewDecision
 
          reviews(first:100) {
            edges {
              node {
                author {
                  login
                }
                publishedAt
                state
              }
            }
          }
        }
      }
    }
  }
}

In curl:

curl -v \
      -H "Authorization: Bearer $TOKEN" \
      -H 'content-type: application/json' \
      -X POST \
      --data '{"query": "query { search(query: \"org:ORG is:pr created:>2024-01-01\", type: ISSUE, last: 50) { edges { node { ... on PullRequest { url title createdAt author { login } reviewDecision reviews(first:100) { edges { node { author { login } publishedAt state}}}}}}}}"}' \
 https://api.github.com/graphql > /tmp/data.json

Script to query the results

from datetime import datetime, timedelta
from collections import defaultdict
import json
 
def toDatetime(s):
    return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z")
 
 
reviewers = defaultdict(list)
oldestPr = None
 
with open('/tmp/data.json') as f:
    data = json.loads(''.join(f.readlines()))
 
 
    for pr in data['data']['search']['edges']:
        created = toDatetime(pr['node']['createdAt'])
        if oldestPr is None:
            oldestPr = created
        oldestPr = min(oldestPr, created)
        oldestByPerson = defaultdict(list)
        for review in pr['node']['reviews']['edges']:
            person = review['node']['author']['login']
            reviewedAt = toDatetime(review['node']['publishedAt'])
            oldestByPerson[person].append(reviewedAt-created)
        oldestByPerson = dict(oldestByPerson)
        for person, times in oldestByPerson.items():
            # if min(times).total_seconds() > 250000:
            #     print(f"WARNING: Something looks off. Should {pr['node']['title']} really have taken {min(times).total_seconds()} to review?")
            reviewers[person].append(min([x.total_seconds() for x in times]))
    print(f"Total data set is {len(data['data']['search']['edges'])} PRs since {oldestPr}")
 
 
 
 
 
import statistics as stats
sorted_reviewers = sorted(reviewers.items(), key=lambda x: -len(x[1]))
def print_time(td):
    t = td.total_seconds()
    days = int(t/(60*60*24))
    t -= days * 60 * 60 * 24
    hours = int(t/(60*60))
    t -= hours * 60 * 60
    minutes = int(t/60)
    out = []
 
    if days != 0:
        out.append('%sd' % days)
    if days != 0 or hours != 0:
        out.append('%sh' % hours)
    if days != 0 or hours != 0 or minutes != 0:
        out.append('%sm' % minutes)
 
    return ' '.join(out)
 
for reviewer, times in sorted_reviewers:
    if len(times) < 2:
        avg = 'N/A'
        stdev = 'N/A'
    else:
        avg = print_time(timedelta(seconds=stats.mean(times)))
        stdev = print_time(timedelta(seconds=stats.stdev(times)))
    print(f"{reviewer}:  {len(times)} reviews.  Time to first review: (avg: {avg}; stddev {stdev})")

And to determine the time between first approval and second approval:

from datetime import datetime, timedelta
from collections import defaultdict
import json
 
with open('/tmp/data.json') as f:
    data = json.loads(''.join(f.readlines()))
 
def toDatetime(s):
    return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z")
 
def print_time(td):
    t = td.total_seconds()
    days = int(t/(60*60*24))
    t -= days * 60 * 60 * 24
    hours = int(t/(60*60))
    t -= hours * 60 * 60
    minutes = int(t/60)
    out = []
 
    if days != 0:
        out.append('%sd' % days)
    if days != 0 or hours != 0:
        out.append('%sh' % hours)
    if days != 0 or hours != 0 or minutes != 0:
        out.append('%sm' % minutes)
    return ' '.join(out)
 
 
for pr in data['data']['search']['edges']:
    approvals =[x for x in pr['node']['reviews']['edges'] if x['node']['state'] == 'APPROVED']
    if len(approvals) < 2:
        continue
    timeWaitingOnSecondApproval = toDatetime(approvals[-1]['node']['publishedAt']) -  \
        toDatetime(approvals[-2]['node']['publishedAt'])
 
    print(f"Additional {print_time(timeWaitingOnSecondApproval):>8} to get the second approval for {pr['node']['title']}")

The notes of Justin Abrahms

Recently updated

tests for quartz

Zero Knowledge Proofs (ZKP)

Sprint Ceremony input/outputs

Explorer

github pull request querying

Graph View

Backlinks