diff --git a/results/fetch.sh b/results/fetch.sh new file mode 100755 index 0000000..45a9fa8 --- /dev/null +++ b/results/fetch.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +mkdir -p html +wget https://www.vec.vic.gov.au/results/2024-council-election-results -O html/lga_list.html + +IFS=$'\n' + +lgas=$(grep 'href="/voting/.*/results"' html/lga_list.html) + +for lga in $lgas ; do + lga=$(sed 's|.*href="|https://www.vec.vic.gov.au|' <<< $lga) + lga=$(sed 's|">.*||' <<< $lga) + file=$(sed 's|.*elections/||' <<< $lga | sed s'|/results||') + wget $lga -O html/$file +done diff --git a/results/gen-elected.php b/results/gen-elected.php new file mode 100644 index 0000000..1f0d158 --- /dev/null +++ b/results/gen-elected.php @@ -0,0 +1,142 @@ + 70) $score_sum += $score; + else $score_sum -= 1; + } + } + + if ($score_sum > $max_score) { + $max_score = $score_sum; + $best_match = $possible_match; + } + } + return array($max_score, $best_match); +} + +$candidates_files = explode(" ", $candidates_files); + +/* Generate dictionary of candidates and LGAs */ +$candidate_data = []; +foreach ($candidates_files as $file) { + $config_file = dirname($file)."/config.json"; + $config_string = file_get_contents($config_file); + + if ($config_string !== FALSE) { + $config = json_decode($config_string, true); + } else { + error_log("Error opening config.json."); + exit(1); + } + + $candidate_data[$config['councilName']]['_filename'] = $file; + + if (($handle = fopen($file, "r")) !== FALSE) { + $headers = fgetcsv($handle); + while (($data = fgetcsv($handle)) !== FALSE) { + $candidate = []; + foreach ($headers as $key => $value) { + $candidate[$value] = $data[$key]; + } + $name_slug = trim_sluggify($candidate['Candidate Name']); + $candidate_data[$config['councilName']][$name_slug] = $candidate; + } + } +} + +$vec_lga_names = []; +foreach ($results as $lga => $data) { + $vec_lga_names[] = $lga; +} + +function was_elected($candidate, $vec_wards) { + foreach ($vec_wards as $vec_candidates) { + list($score, $match) = match_words($candidate, $vec_candidates); + if ($score > 100) return true; + } + return false; +} + +$header = ["Ward", "Candidate Name", "Elected"]; + +foreach ($candidate_data as $lga => $db_candidates) { + /* Find LGA in results dict */ + list($score, $vec_lga_name) = match_words($lga, $vec_lga_names); + $vec_wards = $results[$vec_lga_name]; + + $elected = []; + /* Go through database candidates and build list of elected candidates */ + foreach ($db_candidates as $key => $value) { + if ($key === '_filename') { + $output_file = dirname($value)."/candidates-elected.csv"; + continue; + } + if (was_elected($value['Candidate Name'], $vec_wards)) { + $elected[] = $value; + } + } + + /* Don't create file if none were elected. */ + if (count($elected) === 0) continue; + + if (($handle = fopen($output_file, "w")) === FALSE) { + error_log('Error opening output file'); + exit(1); + } + + if (fputcsv($handle, $header) === FALSE) { + error_log('Error writing headers to output file'); + exit(3); + } + + foreach ($elected as $candidate) { + $line = array($candidate['Ward'], $candidate['Candidate Name'], "y"); + if (fputcsv($handle, $line) === FALSE) { + error_log('Error writing candidate to output file'); + exit(3); + } + } + + fclose($handle); +} + +exit(0); diff --git a/results/parser.py b/results/parser.py new file mode 100644 index 0000000..3875741 --- /dev/null +++ b/results/parser.py @@ -0,0 +1,53 @@ +from bs4 import BeautifulSoup, Tag as HTMLTag +import json, re, argparse + +parser = argparse.ArgumentParser() +parser.add_argument('filenames', nargs='*') +args = parser.parse_args() + +def get_vacancies(ward): + text = ward.parent.parent.h2.text + ward_name = re.search("[^\(]*", text)[0].strip() + vacancies = int(re.search("\([0-9]+", text)[0].strip("(")) + return (ward_name, vacancies, ward) + +def get_candidate_names(ward_desc): + names = [] + for sibling in ward_desc[2].parent.next_siblings: + if not isinstance(sibling, HTMLTag): + continue + if not (blocks := sibling.find_all('td', class_="list-item-body")): + continue + for block in blocks: + names.append(re.sub('\n.*', '', block.text.strip())) + return names + +def parse_lga(filename): + with open(filename, 'r') as results_fp: + html_doc = results_fp.read() + + soup = BeautifulSoup(html_doc, 'html.parser') + wards0 = soup.find_all(string="Successful candidates") + wards1 = soup.find_all(string="Elected candidates") + + ward_info = [] + for ward in wards0: + ward_info.append(get_vacancies(ward)) + for ward in wards1: + ward_info.append(get_vacancies(ward)) + + results = {} + for ward in ward_info: + names = get_candidate_names(ward) + assert len(names) == ward[1] + results[ward[0]] = names + + return results + +all_results = {} +for lga in args.filenames: + lga_name = re.sub('html/lgas/', '', lga) + results = parse_lga(lga) + all_results[lga_name] = results + +print(json.dumps(all_results, indent=4)) diff --git a/update-elected.sh b/update-elected.sh new file mode 100755 index 0000000..b565670 --- /dev/null +++ b/update-elected.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# This script uses the jq, wp, and php commands, make sure they are installed before running this script. + +# The folder containing data for each council. +# Includes the list of candidates and any media. +DATA_PATH="../spl-data" + +# Iterate over folders in data path +candidates_files=() +for folder in "$DATA_PATH"/*; do + if test -f "$folder"/candidates-generic.csv; then + candidates_files+=("$folder"/candidates-generic.csv) + fi + # Community groups get priority + if test -f "$folder"/candidates.csv; then + candidates_files+=("$folder"/candidates.csv) + fi +done + +php results/gen-elected.php --candidates-files "${candidates_files[*]}" \ + --results-file $DATA_PATH/results.json