Merge branch 'results'
This commit is contained in:
15
results/fetch.sh
Executable file
15
results/fetch.sh
Executable file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
mkdir -p html
|
||||||
|
wget https://www.vec.vic.gov.au/results/2024-council-election-results -O html/lga_list.html
|
||||||
|
|
||||||
|
IFS=$'\n'
|
||||||
|
|
||||||
|
lgas=$(grep 'href="/voting/.*/results"' html/lga_list.html)
|
||||||
|
|
||||||
|
for lga in $lgas ; do
|
||||||
|
lga=$(sed 's|.*href="|https://www.vec.vic.gov.au|' <<< $lga)
|
||||||
|
lga=$(sed 's|">.*||' <<< $lga)
|
||||||
|
file=$(sed 's|.*elections/||' <<< $lga | sed s'|/results||')
|
||||||
|
wget $lga -O html/$file
|
||||||
|
done
|
||||||
142
results/gen-elected.php
Normal file
142
results/gen-elected.php
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
$options = getopt("", ["candidates-files:", "results-file:"]);
|
||||||
|
|
||||||
|
if (isset($options['candidates-files'])) {
|
||||||
|
$candidates_files = $options['candidates-files'];
|
||||||
|
} else {
|
||||||
|
error_log("Error: Missing required option '--candidates-files'.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($options['results-file'])) {
|
||||||
|
$results_file = $options['results-file'];
|
||||||
|
$results_string = file_get_contents($results_file);
|
||||||
|
|
||||||
|
if ($results_string !== FALSE) {
|
||||||
|
$results = json_decode($results_string, true);
|
||||||
|
} else {
|
||||||
|
error_log("Error opening results.json.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
error_log("Error: Missing required option '--results-file'.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
function trim_sluggify($input) {
|
||||||
|
return strtolower(str_replace(' ', '-', trim($input)));
|
||||||
|
}
|
||||||
|
|
||||||
|
function match_words($words, $list) {
|
||||||
|
/* Match database names to VEC names */
|
||||||
|
$max_score = 0;
|
||||||
|
$best_match = "no match";
|
||||||
|
foreach ($list as $possible_match) {
|
||||||
|
$aa = preg_split("/[^a-z]/", strtolower($words));
|
||||||
|
$bb = preg_split("/[^a-z]/", strtolower($possible_match));
|
||||||
|
|
||||||
|
$score_sum = 0;
|
||||||
|
foreach ($aa as $a) {
|
||||||
|
foreach ($bb as $b) {
|
||||||
|
similar_text($a, $b, $score);
|
||||||
|
if ($score > 70) $score_sum += $score;
|
||||||
|
else $score_sum -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($score_sum > $max_score) {
|
||||||
|
$max_score = $score_sum;
|
||||||
|
$best_match = $possible_match;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return array($max_score, $best_match);
|
||||||
|
}
|
||||||
|
|
||||||
|
$candidates_files = explode(" ", $candidates_files);
|
||||||
|
|
||||||
|
/* Generate dictionary of candidates and LGAs */
|
||||||
|
$candidate_data = [];
|
||||||
|
foreach ($candidates_files as $file) {
|
||||||
|
$config_file = dirname($file)."/config.json";
|
||||||
|
$config_string = file_get_contents($config_file);
|
||||||
|
|
||||||
|
if ($config_string !== FALSE) {
|
||||||
|
$config = json_decode($config_string, true);
|
||||||
|
} else {
|
||||||
|
error_log("Error opening config.json.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
$candidate_data[$config['councilName']]['_filename'] = $file;
|
||||||
|
|
||||||
|
if (($handle = fopen($file, "r")) !== FALSE) {
|
||||||
|
$headers = fgetcsv($handle);
|
||||||
|
while (($data = fgetcsv($handle)) !== FALSE) {
|
||||||
|
$candidate = [];
|
||||||
|
foreach ($headers as $key => $value) {
|
||||||
|
$candidate[$value] = $data[$key];
|
||||||
|
}
|
||||||
|
$name_slug = trim_sluggify($candidate['Candidate Name']);
|
||||||
|
$candidate_data[$config['councilName']][$name_slug] = $candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$vec_lga_names = [];
|
||||||
|
foreach ($results as $lga => $data) {
|
||||||
|
$vec_lga_names[] = $lga;
|
||||||
|
}
|
||||||
|
|
||||||
|
function was_elected($candidate, $vec_wards) {
|
||||||
|
foreach ($vec_wards as $vec_candidates) {
|
||||||
|
list($score, $match) = match_words($candidate, $vec_candidates);
|
||||||
|
if ($score > 100) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
$header = ["Ward", "Candidate Name", "Elected"];
|
||||||
|
|
||||||
|
foreach ($candidate_data as $lga => $db_candidates) {
|
||||||
|
/* Find LGA in results dict */
|
||||||
|
list($score, $vec_lga_name) = match_words($lga, $vec_lga_names);
|
||||||
|
$vec_wards = $results[$vec_lga_name];
|
||||||
|
|
||||||
|
$elected = [];
|
||||||
|
/* Go through database candidates and build list of elected candidates */
|
||||||
|
foreach ($db_candidates as $key => $value) {
|
||||||
|
if ($key === '_filename') {
|
||||||
|
$output_file = dirname($value)."/candidates-elected.csv";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (was_elected($value['Candidate Name'], $vec_wards)) {
|
||||||
|
$elected[] = $value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Don't create file if none were elected. */
|
||||||
|
if (count($elected) === 0) continue;
|
||||||
|
|
||||||
|
if (($handle = fopen($output_file, "w")) === FALSE) {
|
||||||
|
error_log('Error opening output file');
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fputcsv($handle, $header) === FALSE) {
|
||||||
|
error_log('Error writing headers to output file');
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($elected as $candidate) {
|
||||||
|
$line = array($candidate['Ward'], $candidate['Candidate Name'], "y");
|
||||||
|
if (fputcsv($handle, $line) === FALSE) {
|
||||||
|
error_log('Error writing candidate to output file');
|
||||||
|
exit(3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose($handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(0);
|
||||||
53
results/parser.py
Normal file
53
results/parser.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
from bs4 import BeautifulSoup, Tag as HTMLTag
|
||||||
|
import json, re, argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('filenames', nargs='*')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
def get_vacancies(ward):
|
||||||
|
text = ward.parent.parent.h2.text
|
||||||
|
ward_name = re.search("[^\(]*", text)[0].strip()
|
||||||
|
vacancies = int(re.search("\([0-9]+", text)[0].strip("("))
|
||||||
|
return (ward_name, vacancies, ward)
|
||||||
|
|
||||||
|
def get_candidate_names(ward_desc):
|
||||||
|
names = []
|
||||||
|
for sibling in ward_desc[2].parent.next_siblings:
|
||||||
|
if not isinstance(sibling, HTMLTag):
|
||||||
|
continue
|
||||||
|
if not (blocks := sibling.find_all('td', class_="list-item-body")):
|
||||||
|
continue
|
||||||
|
for block in blocks:
|
||||||
|
names.append(re.sub('\n.*', '', block.text.strip()))
|
||||||
|
return names
|
||||||
|
|
||||||
|
def parse_lga(filename):
|
||||||
|
with open(filename, 'r') as results_fp:
|
||||||
|
html_doc = results_fp.read()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||||
|
wards0 = soup.find_all(string="Successful candidates")
|
||||||
|
wards1 = soup.find_all(string="Elected candidates")
|
||||||
|
|
||||||
|
ward_info = []
|
||||||
|
for ward in wards0:
|
||||||
|
ward_info.append(get_vacancies(ward))
|
||||||
|
for ward in wards1:
|
||||||
|
ward_info.append(get_vacancies(ward))
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
for ward in ward_info:
|
||||||
|
names = get_candidate_names(ward)
|
||||||
|
assert len(names) == ward[1]
|
||||||
|
results[ward[0]] = names
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
all_results = {}
|
||||||
|
for lga in args.filenames:
|
||||||
|
lga_name = re.sub('html/lgas/', '', lga)
|
||||||
|
results = parse_lga(lga)
|
||||||
|
all_results[lga_name] = results
|
||||||
|
|
||||||
|
print(json.dumps(all_results, indent=4))
|
||||||
22
update-elected.sh
Executable file
22
update-elected.sh
Executable file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# This script uses the jq, wp, and php commands, make sure they are installed before running this script.
|
||||||
|
|
||||||
|
# The folder containing data for each council.
|
||||||
|
# Includes the list of candidates and any media.
|
||||||
|
DATA_PATH="../spl-data"
|
||||||
|
|
||||||
|
# Iterate over folders in data path
|
||||||
|
candidates_files=()
|
||||||
|
for folder in "$DATA_PATH"/*; do
|
||||||
|
if test -f "$folder"/candidates-generic.csv; then
|
||||||
|
candidates_files+=("$folder"/candidates-generic.csv)
|
||||||
|
fi
|
||||||
|
# Community groups get priority
|
||||||
|
if test -f "$folder"/candidates.csv; then
|
||||||
|
candidates_files+=("$folder"/candidates.csv)
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
php results/gen-elected.php --candidates-files "${candidates_files[*]}" \
|
||||||
|
--results-file $DATA_PATH/results.json
|
||||||
Reference in New Issue
Block a user