Merge branch 'results'
This commit is contained in:
15
results/fetch.sh
Executable file
15
results/fetch.sh
Executable file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
mkdir -p html
|
||||
wget https://www.vec.vic.gov.au/results/2024-council-election-results -O html/lga_list.html
|
||||
|
||||
IFS=$'\n'
|
||||
|
||||
lgas=$(grep 'href="/voting/.*/results"' html/lga_list.html)
|
||||
|
||||
for lga in $lgas ; do
|
||||
lga=$(sed 's|.*href="|https://www.vec.vic.gov.au|' <<< $lga)
|
||||
lga=$(sed 's|">.*||' <<< $lga)
|
||||
file=$(sed 's|.*elections/||' <<< $lga | sed s'|/results||')
|
||||
wget $lga -O html/$file
|
||||
done
|
||||
142
results/gen-elected.php
Normal file
142
results/gen-elected.php
Normal file
@@ -0,0 +1,142 @@
|
||||
<?php
|
||||
|
||||
$options = getopt("", ["candidates-files:", "results-file:"]);
|
||||
|
||||
if (isset($options['candidates-files'])) {
|
||||
$candidates_files = $options['candidates-files'];
|
||||
} else {
|
||||
error_log("Error: Missing required option '--candidates-files'.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (isset($options['results-file'])) {
|
||||
$results_file = $options['results-file'];
|
||||
$results_string = file_get_contents($results_file);
|
||||
|
||||
if ($results_string !== FALSE) {
|
||||
$results = json_decode($results_string, true);
|
||||
} else {
|
||||
error_log("Error opening results.json.");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
error_log("Error: Missing required option '--results-file'.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
function trim_sluggify($input) {
|
||||
return strtolower(str_replace(' ', '-', trim($input)));
|
||||
}
|
||||
|
||||
function match_words($words, $list) {
|
||||
/* Match database names to VEC names */
|
||||
$max_score = 0;
|
||||
$best_match = "no match";
|
||||
foreach ($list as $possible_match) {
|
||||
$aa = preg_split("/[^a-z]/", strtolower($words));
|
||||
$bb = preg_split("/[^a-z]/", strtolower($possible_match));
|
||||
|
||||
$score_sum = 0;
|
||||
foreach ($aa as $a) {
|
||||
foreach ($bb as $b) {
|
||||
similar_text($a, $b, $score);
|
||||
if ($score > 70) $score_sum += $score;
|
||||
else $score_sum -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ($score_sum > $max_score) {
|
||||
$max_score = $score_sum;
|
||||
$best_match = $possible_match;
|
||||
}
|
||||
}
|
||||
return array($max_score, $best_match);
|
||||
}
|
||||
|
||||
$candidates_files = explode(" ", $candidates_files);
|
||||
|
||||
/* Generate dictionary of candidates and LGAs */
|
||||
$candidate_data = [];
|
||||
foreach ($candidates_files as $file) {
|
||||
$config_file = dirname($file)."/config.json";
|
||||
$config_string = file_get_contents($config_file);
|
||||
|
||||
if ($config_string !== FALSE) {
|
||||
$config = json_decode($config_string, true);
|
||||
} else {
|
||||
error_log("Error opening config.json.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$candidate_data[$config['councilName']]['_filename'] = $file;
|
||||
|
||||
if (($handle = fopen($file, "r")) !== FALSE) {
|
||||
$headers = fgetcsv($handle);
|
||||
while (($data = fgetcsv($handle)) !== FALSE) {
|
||||
$candidate = [];
|
||||
foreach ($headers as $key => $value) {
|
||||
$candidate[$value] = $data[$key];
|
||||
}
|
||||
$name_slug = trim_sluggify($candidate['Candidate Name']);
|
||||
$candidate_data[$config['councilName']][$name_slug] = $candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$vec_lga_names = [];
|
||||
foreach ($results as $lga => $data) {
|
||||
$vec_lga_names[] = $lga;
|
||||
}
|
||||
|
||||
function was_elected($candidate, $vec_wards) {
|
||||
foreach ($vec_wards as $vec_candidates) {
|
||||
list($score, $match) = match_words($candidate, $vec_candidates);
|
||||
if ($score > 100) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
$header = ["Ward", "Candidate Name", "Elected"];
|
||||
|
||||
foreach ($candidate_data as $lga => $db_candidates) {
|
||||
/* Find LGA in results dict */
|
||||
list($score, $vec_lga_name) = match_words($lga, $vec_lga_names);
|
||||
$vec_wards = $results[$vec_lga_name];
|
||||
|
||||
$elected = [];
|
||||
/* Go through database candidates and build list of elected candidates */
|
||||
foreach ($db_candidates as $key => $value) {
|
||||
if ($key === '_filename') {
|
||||
$output_file = dirname($value)."/candidates-elected.csv";
|
||||
continue;
|
||||
}
|
||||
if (was_elected($value['Candidate Name'], $vec_wards)) {
|
||||
$elected[] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
/* Don't create file if none were elected. */
|
||||
if (count($elected) === 0) continue;
|
||||
|
||||
if (($handle = fopen($output_file, "w")) === FALSE) {
|
||||
error_log('Error opening output file');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fputcsv($handle, $header) === FALSE) {
|
||||
error_log('Error writing headers to output file');
|
||||
exit(3);
|
||||
}
|
||||
|
||||
foreach ($elected as $candidate) {
|
||||
$line = array($candidate['Ward'], $candidate['Candidate Name'], "y");
|
||||
if (fputcsv($handle, $line) === FALSE) {
|
||||
error_log('Error writing candidate to output file');
|
||||
exit(3);
|
||||
}
|
||||
}
|
||||
|
||||
fclose($handle);
|
||||
}
|
||||
|
||||
exit(0);
|
||||
53
results/parser.py
Normal file
53
results/parser.py
Normal file
@@ -0,0 +1,53 @@
|
||||
from bs4 import BeautifulSoup, Tag as HTMLTag
|
||||
import json, re, argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filenames', nargs='*')
|
||||
args = parser.parse_args()
|
||||
|
||||
def get_vacancies(ward):
|
||||
text = ward.parent.parent.h2.text
|
||||
ward_name = re.search("[^\(]*", text)[0].strip()
|
||||
vacancies = int(re.search("\([0-9]+", text)[0].strip("("))
|
||||
return (ward_name, vacancies, ward)
|
||||
|
||||
def get_candidate_names(ward_desc):
|
||||
names = []
|
||||
for sibling in ward_desc[2].parent.next_siblings:
|
||||
if not isinstance(sibling, HTMLTag):
|
||||
continue
|
||||
if not (blocks := sibling.find_all('td', class_="list-item-body")):
|
||||
continue
|
||||
for block in blocks:
|
||||
names.append(re.sub('\n.*', '', block.text.strip()))
|
||||
return names
|
||||
|
||||
def parse_lga(filename):
|
||||
with open(filename, 'r') as results_fp:
|
||||
html_doc = results_fp.read()
|
||||
|
||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||
wards0 = soup.find_all(string="Successful candidates")
|
||||
wards1 = soup.find_all(string="Elected candidates")
|
||||
|
||||
ward_info = []
|
||||
for ward in wards0:
|
||||
ward_info.append(get_vacancies(ward))
|
||||
for ward in wards1:
|
||||
ward_info.append(get_vacancies(ward))
|
||||
|
||||
results = {}
|
||||
for ward in ward_info:
|
||||
names = get_candidate_names(ward)
|
||||
assert len(names) == ward[1]
|
||||
results[ward[0]] = names
|
||||
|
||||
return results
|
||||
|
||||
all_results = {}
|
||||
for lga in args.filenames:
|
||||
lga_name = re.sub('html/lgas/', '', lga)
|
||||
results = parse_lga(lga)
|
||||
all_results[lga_name] = results
|
||||
|
||||
print(json.dumps(all_results, indent=4))
|
||||
22
update-elected.sh
Executable file
22
update-elected.sh
Executable file
@@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script uses the jq, wp, and php commands, make sure they are installed before running this script.
|
||||
|
||||
# The folder containing data for each council.
|
||||
# Includes the list of candidates and any media.
|
||||
DATA_PATH="../spl-data"
|
||||
|
||||
# Iterate over folders in data path
|
||||
candidates_files=()
|
||||
for folder in "$DATA_PATH"/*; do
|
||||
if test -f "$folder"/candidates-generic.csv; then
|
||||
candidates_files+=("$folder"/candidates-generic.csv)
|
||||
fi
|
||||
# Community groups get priority
|
||||
if test -f "$folder"/candidates.csv; then
|
||||
candidates_files+=("$folder"/candidates.csv)
|
||||
fi
|
||||
done
|
||||
|
||||
php results/gen-elected.php --candidates-files "${candidates_files[*]}" \
|
||||
--results-file $DATA_PATH/results.json
|
||||
Reference in New Issue
Block a user