Results parser working for all LGAs (except melbourne)
This commit is contained in:
@@ -1,5 +1,15 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
council_name=boroondara-city-council
|
mkdir -p html
|
||||||
|
wget https://www.vec.vic.gov.au/results/2024-council-election-results -O html/lga_list.html
|
||||||
|
|
||||||
wget https://www.vec.vic.gov.au/voting/2024-local-council-elections/$council_name/results -O $council_name
|
IFS=$'\n'
|
||||||
|
|
||||||
|
lgas=$(grep 'href="/voting/.*/results"' html/lga_list.html)
|
||||||
|
|
||||||
|
for lga in $lgas ; do
|
||||||
|
lga=$(sed 's|.*href="|https://www.vec.vic.gov.au|' <<< $lga)
|
||||||
|
lga=$(sed 's|">.*||' <<< $lga)
|
||||||
|
file=$(sed 's|.*elections/||' <<< $lga | sed s'|/results||')
|
||||||
|
wget $lga -O html/$file
|
||||||
|
done
|
||||||
|
|||||||
158
results/gen-elected.php
Normal file
158
results/gen-elected.php
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
//require_once("parse_generic_csv.php");
|
||||||
|
|
||||||
|
$options = getopt("", ["candidates-files:"]);
|
||||||
|
|
||||||
|
if (isset($options['candidates-files'])) {
|
||||||
|
$candidates_files = $options['candidates-files'];
|
||||||
|
} else {
|
||||||
|
error_log("Error: Missing required option '--candidates-files'.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
function trim_sluggify($input) {
|
||||||
|
return strtolower(str_replace(' ', '-', trim($input)));
|
||||||
|
}
|
||||||
|
|
||||||
|
$candidates_files = explode(" ", $candidates_files);
|
||||||
|
|
||||||
|
/* Generate dictionary of candidates and LGAs */
|
||||||
|
$candidate_data = [];
|
||||||
|
foreach ($candidates_files as $file) {
|
||||||
|
$config_file = dirname($file)."/config.json";
|
||||||
|
$config_string = file_get_contents($config_file);
|
||||||
|
|
||||||
|
if ($config_string !== FALSE) {
|
||||||
|
$config = json_decode($config_string, true);
|
||||||
|
} else {
|
||||||
|
error_log("Error opening config.json.");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (($handle = fopen($file, "r")) !== FALSE) {
|
||||||
|
$headers = fgetcsv($handle);
|
||||||
|
while (($data = fgetcsv($handle)) !== FALSE) {
|
||||||
|
$candidate = [];
|
||||||
|
foreach ($headers as $key => $value) {
|
||||||
|
$candidate[$value] = $data[$key];
|
||||||
|
}
|
||||||
|
$candidate['Council'] = $config['councilName'];
|
||||||
|
$name_slug = trim_sluggify($candidate['Candidate Name']);
|
||||||
|
$candidate_data[$name_slug] = $candidate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_r($candidate_data);
|
||||||
|
|
||||||
|
/* Get list of elected candidates */
|
||||||
|
|
||||||
|
//$lga_list = [];
|
||||||
|
/* Generate dictionary of LGAs and Wards */
|
||||||
|
//foreach ($config_files as $config_file) {
|
||||||
|
// $config_string = file_get_contents($config_file);
|
||||||
|
// if ($config_string !== FALSE) {
|
||||||
|
// $config = json_decode($config_string, true);
|
||||||
|
// } else {
|
||||||
|
// error_log("Error opening config.json.");
|
||||||
|
// exit(1);
|
||||||
|
// }
|
||||||
|
// $config['config-file'] = $config_file;
|
||||||
|
// $lga_list[] = $config;
|
||||||
|
//}
|
||||||
|
|
||||||
|
/* Match user typed LGA/Ward to our database */
|
||||||
|
//match_lga($candidate_data, $lga_list);
|
||||||
|
|
||||||
|
$header = ["Ward", "Candidate Name", "Rating", "Pledge", "Picture"];
|
||||||
|
|
||||||
|
/* Generate candidates-generic.csv */
|
||||||
|
//foreach ($lga_list as $lga) {
|
||||||
|
// $lga_candidates = array_filter($candidate_data, function ($candidate) use ($lga) {
|
||||||
|
// return $candidate['match_lga'] === $lga['slug'];
|
||||||
|
// });
|
||||||
|
//
|
||||||
|
// if (count($lga_candidates) === 0) continue;
|
||||||
|
//
|
||||||
|
// remove_duplicates($lga_candidates);
|
||||||
|
//
|
||||||
|
// $dir = dirname($lga['config-file']);
|
||||||
|
// $dir_files = scandir($dir);
|
||||||
|
// $output_file = $dir."/candidates-generic.csv";
|
||||||
|
// $override_file = $dir."/candidates-override.csv";
|
||||||
|
//
|
||||||
|
// if (($handle = fopen($output_file, "w")) === FALSE) {
|
||||||
|
// error_log('Error opening output file');
|
||||||
|
// exit(1);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if (fputcsv($handle, $header) === FALSE) {
|
||||||
|
// error_log('Error writing headers to output file');
|
||||||
|
// exit(3);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $lines = [];
|
||||||
|
// foreach ($lga_candidates as $candidate) {
|
||||||
|
// /* Add extension to photo hash */
|
||||||
|
// if (strlen($candidate['Photo'])) {
|
||||||
|
// foreach ($dir_files as $file) {
|
||||||
|
// if (preg_match("/\.json$/", $file)) continue;
|
||||||
|
// if (strstr($file, $candidate['Photo'])) {
|
||||||
|
// $candidate['Photo'] = $file;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $lines[] = [
|
||||||
|
// $candidate['match_ward'],
|
||||||
|
// $candidate['Name'],
|
||||||
|
// $candidate['Score'],
|
||||||
|
// $candidate['Pledge'],
|
||||||
|
// $candidate['Photo'],
|
||||||
|
// ];
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// /* Apply overrides if they exist */
|
||||||
|
// $overrides = [];
|
||||||
|
// if (file_exists($override_file)) {
|
||||||
|
// if (($ovr_handle = fopen($override_file, "r")) !== FALSE) {
|
||||||
|
// $headers = fgetcsv($ovr_handle);
|
||||||
|
// while (($data = fgetcsv($ovr_handle)) !== FALSE) {
|
||||||
|
// $override = [];
|
||||||
|
// foreach ($headers as $key => $value) {
|
||||||
|
// $override[$value] = $data[$key];
|
||||||
|
// }
|
||||||
|
// $overrides[] = $override;
|
||||||
|
// }
|
||||||
|
// fclose($ovr_handle);
|
||||||
|
// } else {
|
||||||
|
// error_log('Error opening overrides file');
|
||||||
|
// exit(3);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// foreach ($overrides as $override) {
|
||||||
|
// foreach ($lines as $line_key => $line) {
|
||||||
|
// $match_index = array_search($override['Match Field'], $header);
|
||||||
|
// $replace_index = array_search($override['Replace Field'], $header);
|
||||||
|
// if ($line[$match_index] === $override['Match Value']) {
|
||||||
|
// if ($replace_index !== false)
|
||||||
|
// $lines[$line_key][$replace_index] = $override['Replace Value'];
|
||||||
|
// else /* If 'Replace Field' is not matched - delete this entry */
|
||||||
|
// $lines[$line_key]['Delete'] = 'y';
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// foreach ($lines as $line) {
|
||||||
|
// if (isset($line['Delete'])) continue;
|
||||||
|
// if (fputcsv($handle, $line) === FALSE) {
|
||||||
|
// error_log('Error writing candidate to output file');
|
||||||
|
// exit(3);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// fclose($handle);
|
||||||
|
//}
|
||||||
|
|
||||||
|
exit(0);
|
||||||
@@ -1,24 +1,53 @@
|
|||||||
from bs4 import BeautifulSoup, Tag as HTMLTag
|
from bs4 import BeautifulSoup, Tag as HTMLTag
|
||||||
|
import json, re, argparse
|
||||||
|
|
||||||
with open("boroondara-city-council", 'r') as results_fp:
|
parser = argparse.ArgumentParser()
|
||||||
html_doc = results_fp.read()
|
parser.add_argument('filenames', nargs='*')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
soup = BeautifulSoup(html_doc, 'html.parser')
|
def get_vacancies(ward):
|
||||||
candidates0 = soup.find_all(string="Successful candidates")
|
text = ward.parent.parent.h2.text
|
||||||
candidates1 = soup.find_all(string="Elected candidates")
|
ward_name = re.search("[^\(]*", text)[0].strip()
|
||||||
|
vacancies = int(re.search("\([0-9]+", text)[0].strip("("))
|
||||||
|
return (ward_name, vacancies, ward)
|
||||||
|
|
||||||
def get_candidate_name(candidate):
|
def get_candidate_names(ward_desc):
|
||||||
for sibling in candidate.parent.next_siblings:
|
names = []
|
||||||
|
for sibling in ward_desc[2].parent.next_siblings:
|
||||||
if not isinstance(sibling, HTMLTag):
|
if not isinstance(sibling, HTMLTag):
|
||||||
continue
|
continue
|
||||||
if not (block := sibling.find('td', class_="list-item-body")):
|
if not (blocks := sibling.find_all('td', class_="list-item-body")):
|
||||||
continue
|
continue
|
||||||
return block.text.strip()
|
for block in blocks:
|
||||||
|
names.append(re.sub('\n.*', '', block.text.strip()))
|
||||||
|
return names
|
||||||
|
|
||||||
names = []
|
def parse_lga(filename):
|
||||||
for candidate in candidates0:
|
with open(filename, 'r') as results_fp:
|
||||||
names.append(get_candidate_name(candidate))
|
html_doc = results_fp.read()
|
||||||
for candidate in candidates1:
|
|
||||||
names.append(get_candidate_name(candidate))
|
|
||||||
|
|
||||||
print(names)
|
soup = BeautifulSoup(html_doc, 'html.parser')
|
||||||
|
wards0 = soup.find_all(string="Successful candidates")
|
||||||
|
wards1 = soup.find_all(string="Elected candidates")
|
||||||
|
|
||||||
|
ward_info = []
|
||||||
|
for ward in wards0:
|
||||||
|
ward_info.append(get_vacancies(ward))
|
||||||
|
for ward in wards1:
|
||||||
|
ward_info.append(get_vacancies(ward))
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
for ward in ward_info:
|
||||||
|
names = get_candidate_names(ward)
|
||||||
|
assert len(names) == ward[1]
|
||||||
|
results[ward[0]] = names
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
all_results = {}
|
||||||
|
for lga in args.filenames:
|
||||||
|
lga_name = re.sub('html/lgas/', '', lga)
|
||||||
|
results = parse_lga(lga)
|
||||||
|
all_results[lga_name] = results
|
||||||
|
|
||||||
|
print(json.dumps(all_results, indent=4))
|
||||||
|
|||||||
21
update-elected.sh
Executable file
21
update-elected.sh
Executable file
@@ -0,0 +1,21 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# This script uses the jq, wp, and php commands, make sure they are installed before running this script.
|
||||||
|
|
||||||
|
# The folder containing data for each council.
|
||||||
|
# Includes the list of candidates and any media.
|
||||||
|
DATA_PATH="../spl-data"
|
||||||
|
|
||||||
|
# Iterate over folders in data path
|
||||||
|
candidates_files=()
|
||||||
|
for folder in "$DATA_PATH"/*; do
|
||||||
|
if test -f "$folder"/candidates-generic.csv; then
|
||||||
|
candidates_files+=("$folder"/candidates-generic.csv)
|
||||||
|
fi
|
||||||
|
# Community groups get priority
|
||||||
|
if test -f "$folder"/candidates.csv; then
|
||||||
|
candidates_files+=("$folder"/candidates.csv)
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
php results/gen-elected.php --candidates-files "${candidates_files[*]}"
|
||||||
Reference in New Issue
Block a user