From bfdd076eed9f2cf956cc3497b13ede082131c08e Mon Sep 17 00:00:00 2001 From: Matthieu Napoli Date: Fri, 9 Jan 2015 00:27:14 +1300 Subject: [PATCH] Github API queries optimization #16 --- app/config/config.php | 14 -- src/Maintained/GitHub/SearchPager.php | 143 ++++++++++++++++++ .../Statistics/StatisticsComputer.php | 116 +++++++------- 3 files changed, 199 insertions(+), 74 deletions(-) create mode 100644 src/Maintained/GitHub/SearchPager.php diff --git a/app/config/config.php b/app/config/config.php index f60e396..2ef8adc 100644 --- a/app/config/config.php +++ b/app/config/config.php @@ -31,18 +31,6 @@ return [ // GitHub API 'github.auth_token' => null, - 'issues.label_exclusions' => [ - '.*enhancement.*', - '.*feature.*', - '.*task.*', - '.*refactoring.*', - '.*duplicate.*', - '(.*[\s\.-])?wip', - '(.*[\s\.-])?rfc', - '(.*[\s\.-])?poc', - '(.*[\s\.-])?dx', - ], - StatisticsProvider::class => link(CachedStatisticsProvider::class), CachedStatisticsProvider::class => object() ->constructorParameter('cache', link('storage.statistics')) @@ -50,8 +38,6 @@ return [ StatisticsProviderLogger::class => object() ->constructorParameter('wrapped', link(StatisticsComputer::class)) ->constructorParameter('repositoryStorage', link('storage.repositories')), - StatisticsComputer::class => object() - ->constructorParameter('excludedLabels', link('issues.label_exclusions')), // CLI commands ClearCacheCommand::class => object() diff --git a/src/Maintained/GitHub/SearchPager.php b/src/Maintained/GitHub/SearchPager.php new file mode 100644 index 0000000..0b2d547 --- /dev/null +++ b/src/Maintained/GitHub/SearchPager.php @@ -0,0 +1,143 @@ + + */ +class SearchPager +{ + /** + * @var Client client + */ + protected $client; + + /** + * @var array pagination + * Comes from pagination headers in Github API results + */ + protected $pagination; + + /** + * The Github client to use for pagination. This must be the same + * instance that you got the Api instance from, i.e.: + * + * $client = new \Github\Client(); + * $api = $client->api('someApi'); + * $pager = new \Github\ResultPager($client); + * + * @param Client $client + * + */ + public function __construct(Client $client) + { + $this->client = $client; + } + + public function fetchAll($q, $sort = 'updated', $order = 'desc') + { + $api = $this->client->search(); + + // get the perPage from the api + $perPage = $api->getPerPage(); + + // Set parameters per_page to GitHub max to minimize number of requests + $api->setPerPage(100); + + $api->issues($q, $sort, $order); + $results = $api->issues($q, $sort, $order)['items']; + $this->postFetch(); + + while ($this->hasNext()) { + $results = array_merge($results, $this->fetchNext()['items']); + } + + // restore the perPage + $api->setPerPage($perPage); + + return $results; + } + + /** + * {@inheritdoc} + */ + public function postFetch() + { + $this->pagination = ResponseMediator::getPagination($this->client->getHttpClient()->getLastResponse()); + } + + /** + * {@inheritdoc} + */ + public function hasNext() + { + return $this->has('next'); + } + + /** + * {@inheritdoc} + */ + public function fetchNext() + { + return $this->get('next'); + } + + /** + * {@inheritdoc} + */ + public function hasPrevious() + { + return $this->has('prev'); + } + + /** + * {@inheritdoc} + */ + public function fetchPrevious() + { + return $this->get('prev'); + } + + /** + * {@inheritdoc} + */ + public function fetchFirst() + { + return $this->get('first'); + } + + /** + * {@inheritdoc} + */ + public function fetchLast() + { + return $this->get('last'); + } + + /** + * {@inheritdoc} + */ + protected function has($key) + { + return !empty($this->pagination) && isset($this->pagination[$key]); + } + + /** + * {@inheritdoc} + */ + protected function get($key) + { + if ($this->has($key)) { + $result = $this->client->getHttpClient()->get($this->pagination[$key]); + $this->postFetch(); + + return ResponseMediator::getContent($result); + } + } +} diff --git a/src/Maintained/Statistics/StatisticsComputer.php b/src/Maintained/Statistics/StatisticsComputer.php index 0db0236..92c941c 100644 --- a/src/Maintained/Statistics/StatisticsComputer.php +++ b/src/Maintained/Statistics/StatisticsComputer.php @@ -3,7 +3,7 @@ namespace Maintained\Statistics; use Github\Client; -use Github\ResultPager; +use Maintained\GitHub\SearchPager; use Maintained\Issue; use Maintained\TimeInterval; @@ -22,27 +22,47 @@ class StatisticsComputer implements StatisticsProvider /** * @var string[] */ - private $excludedLabels; + private $excludedLabels = [ + 'enhancement', + 'feature', + 'task', + 'refactoring', + 'duplicate', + ]; - public function __construct(Client $github, array $excludedLabels) + /** + * @var string[] + */ + private $excludedLabelRegexes = [ + '.*enhancement.*', + '.*feature.*', + '.*task.*', + '.*refactoring.*', + '.*duplicate.*', + '(.*[\s\.-])?wip', + '(.*[\s\.-])?rfc', + '(.*[\s\.-])?poc', + '(.*[\s\.-])?dx', + ]; + + public function __construct(Client $github) { $this->github = $github; - $this->excludedLabels = $excludedLabels; } public function getStatistics($user, $repository) { $issues = $this->fetchIssues($user, $repository); + + // Currently disabled because GitHub changed permissions: requires push access // $collaborators = $this->fetchCollaborators($user, $repository); - // $issues = $this->excludeIssuesCreatedByCollaborators($issues, $collaborators); - $issues = $this->excludeIssuesByLabels($issues, $this->excludedLabels); - $latestIssues = $this->keepLatestIssues($issues); + $issues = $this->filterIssuesByLabels($issues); $statistics = new Statistics(); - $statistics->resolutionTime = $this->computeResolutionTime($latestIssues); - $statistics->openIssuesRatio = $this->computeOpenIssueRatio($issues); + $statistics->resolutionTime = $this->computeResolutionTime($issues); + $statistics->openIssuesRatio = $this->computeOpenIssueRatio($user, $repository); return $statistics; } @@ -61,24 +81,20 @@ class StatisticsComputer implements StatisticsProvider } /** - * @param Issue[] $issues + * @param string $user + * @param string $repository * @return float */ - private function computeOpenIssueRatio(array $issues) + private function computeOpenIssueRatio($user, $repository) { - if (empty($issues)) { - return 0; - } + $results = $this->github->search()->issues("type:issue repo:$user/$repository state:open"); + $openCount = $results['total_count']; + $results = $this->github->search()->issues("type:issue repo:$user/$repository state:closed"); + $closedCount = $results['total_count']; - $openIssues = 0; + $total = $openCount + $closedCount; - foreach ($issues as $issue) { - if ($issue->isOpen()) { - $openIssues++; - } - } - - return $openIssues / count($issues); + return ($total !== 0) ? $openCount / $total : 0; } /** @@ -95,14 +111,13 @@ class StatisticsComputer implements StatisticsProvider /** * @param Issue[] $issues - * @param string[] $labels * @return Issue[] */ - private function excludeIssuesByLabels(array $issues, array $labels) + private function filterIssuesByLabels(array $issues) { - $regex = '/^(' . implode(')|(', $labels) . ')$/i'; + $regex = '/^(' . implode(')|(', $this->excludedLabelRegexes) . ')$/i'; - return array_filter($issues, function (Issue $issue) use ($labels, $regex) { + return array_filter($issues, function (Issue $issue) use ($regex) { foreach ($issue->getLabels() as $label) { $match = preg_match($regex, $label); @@ -116,22 +131,6 @@ class StatisticsComputer implements StatisticsProvider }); } - /** - * @param Issue[] $issues - * @return Issue[] - */ - private function keepLatestIssues(array $issues) - { - $sixMonthsAgo = new \DateTime('-6 month'); - - return array_filter($issues, function (Issue $issue) use ($sixMonthsAgo) { - if ($issue->isOpen()) { - return true; - } - return $issue->getOpenedAt() > $sixMonthsAgo; - }); - } - /** * @param float[] $array * @return float @@ -156,28 +155,25 @@ class StatisticsComputer implements StatisticsProvider return $array[$middleIndex]; } - /** - * @param string $user - * @param string $repository - * @return Issue[] - */ - private function fetchIssues($user, $repository) + public function fetchIssues($user, $repository) { - /** @var \GitHub\Api\Issue $issueApi */ - $issueApi = $this->github->api('issue'); + $sixMonthsAgo = new \DateTime('-6 month'); + $sixMonthsAgo = $sixMonthsAgo->format('Y-m-d'); - $paginator = new ResultPager($this->github); - $issues = $paginator->fetchAll($issueApi, 'all', [ - $user, - $repository, - ['state' => 'all'], - ]); + // Pre-filter with labels to fetch as little issues as possible + $excludedLabels = array_map(function ($label) { + return '-label:' . $label; + }, $this->excludedLabels); + $excludedLabels = implode(' ', $excludedLabels); - $issues = array_map(function (array $data) { + $query = "repo:$user/$repository type:issue created:>$sixMonthsAgo $excludedLabels"; + + $paginator = new SearchPager($this->github); + $results = $paginator->fetchAll($query); + + return array_map(function (array $data) { return Issue::fromArray($data); - }, $issues); - - return $issues; + }, $results); } /**