Results parser working for all LGAs (except melbourne)
This commit is contained in:
@@ -1,5 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
council_name=boroondara-city-council
|
||||
mkdir -p html
|
||||
wget https://www.vec.vic.gov.au/results/2024-council-election-results -O html/lga_list.html
|
||||
|
||||
wget https://www.vec.vic.gov.au/voting/2024-local-council-elections/$council_name/results -O $council_name
|
||||
IFS=$'\n'
|
||||
|
||||
lgas=$(grep 'href="/voting/.*/results"' html/lga_list.html)
|
||||
|
||||
for lga in $lgas ; do
|
||||
lga=$(sed 's|.*href="|https://www.vec.vic.gov.au|' <<< $lga)
|
||||
lga=$(sed 's|">.*||' <<< $lga)
|
||||
file=$(sed 's|.*elections/||' <<< $lga | sed s'|/results||')
|
||||
wget $lga -O html/$file
|
||||
done
|
||||
|
||||
158
results/gen-elected.php
Normal file
158
results/gen-elected.php
Normal file
@@ -0,0 +1,158 @@
|
||||
<?php
|
||||
|
||||
//require_once("parse_generic_csv.php");
|
||||
|
||||
$options = getopt("", ["candidates-files:"]);
|
||||
|
||||
if (isset($options['candidates-files'])) {
|
||||
$candidates_files = $options['candidates-files'];
|
||||
} else {
|
||||
error_log("Error: Missing required option '--candidates-files'.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
function trim_sluggify($input) {
|
||||
return strtolower(str_replace(' ', '-', trim($input)));
|
||||
}
|
||||
|
||||
$candidates_files = explode(" ", $candidates_files);
|
||||
|
||||
/* Generate dictionary of candidates and LGAs */
|
||||
$candidate_data = [];
|
||||
foreach ($candidates_files as $file) {
|
||||
$config_file = dirname($file)."/config.json";
|
||||
$config_string = file_get_contents($config_file);
|
||||
|
||||
if ($config_string !== FALSE) {
|
||||
$config = json_decode($config_string, true);
|
||||
} else {
|
||||
error_log("Error opening config.json.");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (($handle = fopen($file, "r")) !== FALSE) {
|
||||
$headers = fgetcsv($handle);
|
||||
while (($data = fgetcsv($handle)) !== FALSE) {
|
||||
$candidate = [];
|
||||
foreach ($headers as $key => $value) {
|
||||
$candidate[$value] = $data[$key];
|
||||
}
|
||||
$candidate['Council'] = $config['councilName'];
|
||||
$name_slug = trim_sluggify($candidate['Candidate Name']);
|
||||
$candidate_data[$name_slug] = $candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print_r($candidate_data);
|
||||
|
||||
/* Get list of elected candidates */
|
||||
|
||||
//$lga_list = [];
|
||||
/* Generate dictionary of LGAs and Wards */
|
||||
//foreach ($config_files as $config_file) {
|
||||
// $config_string = file_get_contents($config_file);
|
||||
// if ($config_string !== FALSE) {
|
||||
// $config = json_decode($config_string, true);
|
||||
// } else {
|
||||
// error_log("Error opening config.json.");
|
||||
// exit(1);
|
||||
// }
|
||||
// $config['config-file'] = $config_file;
|
||||
// $lga_list[] = $config;
|
||||
//}
|
||||
|
||||
/* Match user typed LGA/Ward to our database */
|
||||
//match_lga($candidate_data, $lga_list);
|
||||
|
||||
$header = ["Ward", "Candidate Name", "Rating", "Pledge", "Picture"];
|
||||
|
||||
/* Generate candidates-generic.csv */
|
||||
//foreach ($lga_list as $lga) {
|
||||
// $lga_candidates = array_filter($candidate_data, function ($candidate) use ($lga) {
|
||||
// return $candidate['match_lga'] === $lga['slug'];
|
||||
// });
|
||||
//
|
||||
// if (count($lga_candidates) === 0) continue;
|
||||
//
|
||||
// remove_duplicates($lga_candidates);
|
||||
//
|
||||
// $dir = dirname($lga['config-file']);
|
||||
// $dir_files = scandir($dir);
|
||||
// $output_file = $dir."/candidates-generic.csv";
|
||||
// $override_file = $dir."/candidates-override.csv";
|
||||
//
|
||||
// if (($handle = fopen($output_file, "w")) === FALSE) {
|
||||
// error_log('Error opening output file');
|
||||
// exit(1);
|
||||
// }
|
||||
//
|
||||
// if (fputcsv($handle, $header) === FALSE) {
|
||||
// error_log('Error writing headers to output file');
|
||||
// exit(3);
|
||||
// }
|
||||
//
|
||||
// $lines = [];
|
||||
// foreach ($lga_candidates as $candidate) {
|
||||
// /* Add extension to photo hash */
|
||||
// if (strlen($candidate['Photo'])) {
|
||||
// foreach ($dir_files as $file) {
|
||||
// if (preg_match("/\.json$/", $file)) continue;
|
||||
// if (strstr($file, $candidate['Photo'])) {
|
||||
// $candidate['Photo'] = $file;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// $lines[] = [
|
||||
// $candidate['match_ward'],
|
||||
// $candidate['Name'],
|
||||
// $candidate['Score'],
|
||||
// $candidate['Pledge'],
|
||||
// $candidate['Photo'],
|
||||
// ];
|
||||
// }
|
||||
//
|
||||
// /* Apply overrides if they exist */
|
||||
// $overrides = [];
|
||||
// if (file_exists($override_file)) {
|
||||
// if (($ovr_handle = fopen($override_file, "r")) !== FALSE) {
|
||||
// $headers = fgetcsv($ovr_handle);
|
||||
// while (($data = fgetcsv($ovr_handle)) !== FALSE) {
|
||||
// $override = [];
|
||||
// foreach ($headers as $key => $value) {
|
||||
// $override[$value] = $data[$key];
|
||||
// }
|
||||
// $overrides[] = $override;
|
||||
// }
|
||||
// fclose($ovr_handle);
|
||||
// } else {
|
||||
// error_log('Error opening overrides file');
|
||||
// exit(3);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// foreach ($overrides as $override) {
|
||||
// foreach ($lines as $line_key => $line) {
|
||||
// $match_index = array_search($override['Match Field'], $header);
|
||||
// $replace_index = array_search($override['Replace Field'], $header);
|
||||
// if ($line[$match_index] === $override['Match Value']) {
|
||||
// if ($replace_index !== false)
|
||||
// $lines[$line_key][$replace_index] = $override['Replace Value'];
|
||||
// else /* If 'Replace Field' is not matched - delete this entry */
|
||||
// $lines[$line_key]['Delete'] = 'y';
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// foreach ($lines as $line) {
|
||||
// if (isset($line['Delete'])) continue;
|
||||
// if (fputcsv($handle, $line) === FALSE) {
|
||||
// error_log('Error writing candidate to output file');
|
||||
// exit(3);
|
||||
// }
|
||||
// }
|
||||
// fclose($handle);
|
||||
//}
|
||||
|
||||
exit(0);
|
||||
@@ -1,24 +1,53 @@
|
||||
from bs4 import BeautifulSoup, Tag as HTMLTag
|
||||
import json, re, argparse
|
||||
|
||||
with open("boroondara-city-council", 'r') as results_fp:
|
||||
html_doc = results_fp.read()
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filenames', nargs='*')
|
||||
args = parser.parse_args()
|
||||
|
||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||
candidates0 = soup.find_all(string="Successful candidates")
|
||||
candidates1 = soup.find_all(string="Elected candidates")
|
||||
def get_vacancies(ward):
|
||||
text = ward.parent.parent.h2.text
|
||||
ward_name = re.search("[^\(]*", text)[0].strip()
|
||||
vacancies = int(re.search("\([0-9]+", text)[0].strip("("))
|
||||
return (ward_name, vacancies, ward)
|
||||
|
||||
def get_candidate_name(candidate):
|
||||
for sibling in candidate.parent.next_siblings:
|
||||
def get_candidate_names(ward_desc):
|
||||
names = []
|
||||
for sibling in ward_desc[2].parent.next_siblings:
|
||||
if not isinstance(sibling, HTMLTag):
|
||||
continue
|
||||
if not (block := sibling.find('td', class_="list-item-body")):
|
||||
if not (blocks := sibling.find_all('td', class_="list-item-body")):
|
||||
continue
|
||||
return block.text.strip()
|
||||
for block in blocks:
|
||||
names.append(re.sub('\n.*', '', block.text.strip()))
|
||||
return names
|
||||
|
||||
names = []
|
||||
for candidate in candidates0:
|
||||
names.append(get_candidate_name(candidate))
|
||||
for candidate in candidates1:
|
||||
names.append(get_candidate_name(candidate))
|
||||
def parse_lga(filename):
|
||||
with open(filename, 'r') as results_fp:
|
||||
html_doc = results_fp.read()
|
||||
|
||||
print(names)
|
||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||
wards0 = soup.find_all(string="Successful candidates")
|
||||
wards1 = soup.find_all(string="Elected candidates")
|
||||
|
||||
ward_info = []
|
||||
for ward in wards0:
|
||||
ward_info.append(get_vacancies(ward))
|
||||
for ward in wards1:
|
||||
ward_info.append(get_vacancies(ward))
|
||||
|
||||
results = {}
|
||||
for ward in ward_info:
|
||||
names = get_candidate_names(ward)
|
||||
assert len(names) == ward[1]
|
||||
results[ward[0]] = names
|
||||
|
||||
return results
|
||||
|
||||
all_results = {}
|
||||
for lga in args.filenames:
|
||||
lga_name = re.sub('html/lgas/', '', lga)
|
||||
results = parse_lga(lga)
|
||||
all_results[lga_name] = results
|
||||
|
||||
print(json.dumps(all_results, indent=4))
|
||||
|
||||
21
update-elected.sh
Executable file
21
update-elected.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/bin/bash
|
||||
|
||||
# This script uses the jq, wp, and php commands, make sure they are installed before running this script.
|
||||
|
||||
# The folder containing data for each council.
|
||||
# Includes the list of candidates and any media.
|
||||
DATA_PATH="../spl-data"
|
||||
|
||||
# Iterate over folders in data path
|
||||
candidates_files=()
|
||||
for folder in "$DATA_PATH"/*; do
|
||||
if test -f "$folder"/candidates-generic.csv; then
|
||||
candidates_files+=("$folder"/candidates-generic.csv)
|
||||
fi
|
||||
# Community groups get priority
|
||||
if test -f "$folder"/candidates.csv; then
|
||||
candidates_files+=("$folder"/candidates.csv)
|
||||
fi
|
||||
done
|
||||
|
||||
php results/gen-elected.php --candidates-files "${candidates_files[*]}"
|
||||
Reference in New Issue
Block a user