Merge branch 'results'

This commit is contained in:
Kim Taylor
2024-11-16 14:47:14 +11:00
4 changed files with 232 additions and 0 deletions

15
results/fetch.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/bash
mkdir -p html
wget https://www.vec.vic.gov.au/results/2024-council-election-results -O html/lga_list.html
IFS=$'\n'
lgas=$(grep 'href="/voting/.*/results"' html/lga_list.html)
for lga in $lgas ; do
lga=$(sed 's|.*href="|https://www.vec.vic.gov.au|' <<< $lga)
lga=$(sed 's|">.*||' <<< $lga)
file=$(sed 's|.*elections/||' <<< $lga | sed s'|/results||')
wget $lga -O html/$file
done

142
results/gen-elected.php Normal file
View File

@@ -0,0 +1,142 @@
<?php
$options = getopt("", ["candidates-files:", "results-file:"]);
if (isset($options['candidates-files'])) {
$candidates_files = $options['candidates-files'];
} else {
error_log("Error: Missing required option '--candidates-files'.");
exit(1);
}
if (isset($options['results-file'])) {
$results_file = $options['results-file'];
$results_string = file_get_contents($results_file);
if ($results_string !== FALSE) {
$results = json_decode($results_string, true);
} else {
error_log("Error opening results.json.");
exit(1);
}
} else {
error_log("Error: Missing required option '--results-file'.");
exit(1);
}
function trim_sluggify($input) {
return strtolower(str_replace(' ', '-', trim($input)));
}
function match_words($words, $list) {
/* Match database names to VEC names */
$max_score = 0;
$best_match = "no match";
foreach ($list as $possible_match) {
$aa = preg_split("/[^a-z]/", strtolower($words));
$bb = preg_split("/[^a-z]/", strtolower($possible_match));
$score_sum = 0;
foreach ($aa as $a) {
foreach ($bb as $b) {
similar_text($a, $b, $score);
if ($score > 70) $score_sum += $score;
else $score_sum -= 1;
}
}
if ($score_sum > $max_score) {
$max_score = $score_sum;
$best_match = $possible_match;
}
}
return array($max_score, $best_match);
}
$candidates_files = explode(" ", $candidates_files);
/* Generate dictionary of candidates and LGAs */
$candidate_data = [];
foreach ($candidates_files as $file) {
$config_file = dirname($file)."/config.json";
$config_string = file_get_contents($config_file);
if ($config_string !== FALSE) {
$config = json_decode($config_string, true);
} else {
error_log("Error opening config.json.");
exit(1);
}
$candidate_data[$config['councilName']]['_filename'] = $file;
if (($handle = fopen($file, "r")) !== FALSE) {
$headers = fgetcsv($handle);
while (($data = fgetcsv($handle)) !== FALSE) {
$candidate = [];
foreach ($headers as $key => $value) {
$candidate[$value] = $data[$key];
}
$name_slug = trim_sluggify($candidate['Candidate Name']);
$candidate_data[$config['councilName']][$name_slug] = $candidate;
}
}
}
$vec_lga_names = [];
foreach ($results as $lga => $data) {
$vec_lga_names[] = $lga;
}
function was_elected($candidate, $vec_wards) {
foreach ($vec_wards as $vec_candidates) {
list($score, $match) = match_words($candidate, $vec_candidates);
if ($score > 100) return true;
}
return false;
}
$header = ["Ward", "Candidate Name", "Elected"];
foreach ($candidate_data as $lga => $db_candidates) {
/* Find LGA in results dict */
list($score, $vec_lga_name) = match_words($lga, $vec_lga_names);
$vec_wards = $results[$vec_lga_name];
$elected = [];
/* Go through database candidates and build list of elected candidates */
foreach ($db_candidates as $key => $value) {
if ($key === '_filename') {
$output_file = dirname($value)."/candidates-elected.csv";
continue;
}
if (was_elected($value['Candidate Name'], $vec_wards)) {
$elected[] = $value;
}
}
/* Don't create file if none were elected. */
if (count($elected) === 0) continue;
if (($handle = fopen($output_file, "w")) === FALSE) {
error_log('Error opening output file');
exit(1);
}
if (fputcsv($handle, $header) === FALSE) {
error_log('Error writing headers to output file');
exit(3);
}
foreach ($elected as $candidate) {
$line = array($candidate['Ward'], $candidate['Candidate Name'], "y");
if (fputcsv($handle, $line) === FALSE) {
error_log('Error writing candidate to output file');
exit(3);
}
}
fclose($handle);
}
exit(0);

53
results/parser.py Normal file
View File

@@ -0,0 +1,53 @@
from bs4 import BeautifulSoup, Tag as HTMLTag
import json, re, argparse
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*')
args = parser.parse_args()
def get_vacancies(ward):
text = ward.parent.parent.h2.text
ward_name = re.search("[^\(]*", text)[0].strip()
vacancies = int(re.search("\([0-9]+", text)[0].strip("("))
return (ward_name, vacancies, ward)
def get_candidate_names(ward_desc):
names = []
for sibling in ward_desc[2].parent.next_siblings:
if not isinstance(sibling, HTMLTag):
continue
if not (blocks := sibling.find_all('td', class_="list-item-body")):
continue
for block in blocks:
names.append(re.sub('\n.*', '', block.text.strip()))
return names
def parse_lga(filename):
with open(filename, 'r') as results_fp:
html_doc = results_fp.read()
soup = BeautifulSoup(html_doc, 'html.parser')
wards0 = soup.find_all(string="Successful candidates")
wards1 = soup.find_all(string="Elected candidates")
ward_info = []
for ward in wards0:
ward_info.append(get_vacancies(ward))
for ward in wards1:
ward_info.append(get_vacancies(ward))
results = {}
for ward in ward_info:
names = get_candidate_names(ward)
assert len(names) == ward[1]
results[ward[0]] = names
return results
all_results = {}
for lga in args.filenames:
lga_name = re.sub('html/lgas/', '', lga)
results = parse_lga(lga)
all_results[lga_name] = results
print(json.dumps(all_results, indent=4))

22
update-elected.sh Executable file
View File

@@ -0,0 +1,22 @@
#!/bin/bash
# This script uses the jq, wp, and php commands, make sure they are installed before running this script.
# The folder containing data for each council.
# Includes the list of candidates and any media.
DATA_PATH="../spl-data"
# Iterate over folders in data path
candidates_files=()
for folder in "$DATA_PATH"/*; do
if test -f "$folder"/candidates-generic.csv; then
candidates_files+=("$folder"/candidates-generic.csv)
fi
# Community groups get priority
if test -f "$folder"/candidates.csv; then
candidates_files+=("$folder"/candidates.csv)
fi
done
php results/gen-elected.php --candidates-files "${candidates_files[*]}" \
--results-file $DATA_PATH/results.json