From 2453d550ce84dbdbe6799fd4d6b1bb2f845ab6a9 Mon Sep 17 00:00:00 2001 From: Kim Taylor Date: Fri, 20 Sep 2024 10:19:57 +1000 Subject: [PATCH 1/6] Fetch generic data from google. --- get_generic.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100755 get_generic.sh diff --git a/get_generic.sh b/get_generic.sh new file mode 100755 index 0000000..f3c5c97 --- /dev/null +++ b/get_generic.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +DATA_LOC=../generic-survey + +rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data + +rclone --drive-export-formats csv copyto 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $DATA_LOC/responses.csv From e2fbd1b1ef31c318fe0c3faa0bfa9e090c40162c Mon Sep 17 00:00:00 2001 From: Kim Taylor Date: Sat, 21 Sep 2024 19:49:49 +1000 Subject: [PATCH 2/6] Calculate scores based on Faith's criteria. --- csv-generic/gen-generic.php | 31 +++++++++++++++++++++ csv-generic/parse_generic_csv.php | 46 +++++++++++++++++++++++++++++++ get-generic.sh | 12 ++++++++ get_generic.sh | 7 ----- 4 files changed, 89 insertions(+), 7 deletions(-) create mode 100644 csv-generic/gen-generic.php create mode 100644 csv-generic/parse_generic_csv.php create mode 100755 get-generic.sh delete mode 100755 get_generic.sh diff --git a/csv-generic/gen-generic.php b/csv-generic/gen-generic.php new file mode 100644 index 0000000..4f1e59d --- /dev/null +++ b/csv-generic/gen-generic.php @@ -0,0 +1,31 @@ + $value) { + /* Override key name for questions */ + if ($value === "Verified") { + $is_question = false; + } + + if (strstr($value, "candidate photo")) $value = "Photo"; + if (strstr($value, "In which Local Government Area")) $value = "LGA"; + if (strstr($value, "In which Ward")) $value = "Ward"; + if (strstr($value, "Political Party")) $value = "Party"; + + if ($value === "Pledge") { + if (strstr($data[$key], "I pledge")) $data[$key] = "Yes"; + else $data[$key] = "No"; + } + + if ($is_question) { + $candidate['q'.$question_no++] = $data[$key]; + } else { + $candidate[$value] = $data[$key]; + } + + if ($value === "Pledge") { + $is_question = true; + } + } + $candidate_data[] = $candidate; + } + fclose($handle); + } else { + error_log('Error opening candidates file'); + exit(1); + } + + return $candidate_data; +} diff --git a/get-generic.sh b/get-generic.sh new file mode 100755 index 0000000..9f76f07 --- /dev/null +++ b/get-generic.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +DATA_LOC=../generic-survey + +#rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data + +#rclone --drive-export-formats csv copyto 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $DATA_LOC/responses.csv + + +#content=$(php pledge-update/pledge-page.php --candidates-files "${candidates_files[*]}") + +php csv-generic/gen-generic.php --generic-csv $DATA_LOC/responses.csv diff --git a/get_generic.sh b/get_generic.sh deleted file mode 100755 index f3c5c97..0000000 --- a/get_generic.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -DATA_LOC=../generic-survey - -rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data - -rclone --drive-export-formats csv copyto 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $DATA_LOC/responses.csv From 219b242503cf89027734ead7ca9d740bdff39063 Mon Sep 17 00:00:00 2001 From: Kim Taylor Date: Sun, 22 Sep 2024 19:46:39 +1000 Subject: [PATCH 3/6] Generate candidates-generic.csv files based on fuzzy match with LGA/Ward. --- csv-generic/csv-to-json.php | 20 +++++++ csv-generic/gen-generic.php | 101 +++++++++++++++++++++++++++++++++++- get-generic.sh | 16 ++++-- 3 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 csv-generic/csv-to-json.php diff --git a/csv-generic/csv-to-json.php b/csv-generic/csv-to-json.php new file mode 100644 index 0000000..9bef38b --- /dev/null +++ b/csv-generic/csv-to-json.php @@ -0,0 +1,20 @@ + 70) $score_sum += $score; + else $score_sum -= 10; + } + } + + if ($score_sum > $max_score) { + $max_score = $score_sum; + $match_lga = $lga; + } + } + + $max_score = 0; + foreach ($match_lga['wardNames'] as $ward) { + similar_text($ward, $candidate['Ward'], $score); + if ($score > $max_score) { + $max_score = $score; + $match_ward = $ward; + } + } + + $candidate['match_lga'] = $match_lga['slug']; + $candidate['match_ward'] = $match_ward; +} + +/* Get picture */ +foreach ($candidate_data as &$candidate) { + $candidate['match_picture'] = ""; +} + +$header = ["Ward", "Candidate Name", "Rating", "Picture"]; + +/* Generate candidates-generic.csv */ +foreach ($lga_list as $lga) { + $lga_candidates = array_filter($candidate_data, function ($candidate) use ($lga) { + return $candidate['match_lga'] === $lga['slug']; + }); + + if (count($lga_candidates) === 0) continue; + + $output_file = dirname($lga['config-file'])."/candidates-generic.csv"; + + if (($handle = fopen($output_file, "w")) === FALSE) { + error_log('Error opening output file'); + exit(1); + } + + if (fputcsv($handle, $header) === FALSE) { + error_log('Error writing headers to output file'); + exit(3); + } + + foreach ($lga_candidates as $candidate) { + $fields = [ + $candidate['match_ward'], + $candidate['Name'], + $candidate['Score'], + $candidate['match_picture'], + ]; + + if (fputcsv($handle, $fields) === FALSE) { + error_log('Error writing candidate to output file'); + exit(3); + } + } +} exit(0); diff --git a/get-generic.sh b/get-generic.sh index 9f76f07..dd1060a 100755 --- a/get-generic.sh +++ b/get-generic.sh @@ -1,12 +1,20 @@ #!/bin/bash -DATA_LOC=../generic-survey - #rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data #rclone --drive-export-formats csv copyto 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $DATA_LOC/responses.csv +GENERIC_SURVEY=../generic-survey/responses.csv -#content=$(php pledge-update/pledge-page.php --candidates-files "${candidates_files[*]}") +DATA_PATH="../spl-data" -php csv-generic/gen-generic.php --generic-csv $DATA_LOC/responses.csv +config_files=() +for folder in "$DATA_PATH"/*; do + if test -f "$folder"/config.json; then + config_files+=("$folder"/config.json) + fi +done + +#php csv-generic/csv-to-json.php --generic-csv $DATA_LOC/responses.csv + +php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}" From f9c151bfaef95ff4ce19d4054bdd9105d00d71bd Mon Sep 17 00:00:00 2001 From: Kim Taylor Date: Sun, 22 Sep 2024 22:57:35 +1000 Subject: [PATCH 4/6] Start generating image download script. --- csv-generic/csv-to-json.php | 20 ------------ csv-generic/gen-generic.php | 45 +++----------------------- csv-generic/gen-image-map.php | 54 +++++++++++++++++++++++++++++++ csv-generic/parse_generic_csv.php | 42 ++++++++++++++++++++++++ get-generic.sh | 13 ++++++-- 5 files changed, 111 insertions(+), 63 deletions(-) delete mode 100644 csv-generic/csv-to-json.php create mode 100644 csv-generic/gen-image-map.php diff --git a/csv-generic/csv-to-json.php b/csv-generic/csv-to-json.php deleted file mode 100644 index 9bef38b..0000000 --- a/csv-generic/csv-to-json.php +++ /dev/null @@ -1,20 +0,0 @@ - 70) $score_sum += $score; - else $score_sum -= 10; - } - } - - if ($score_sum > $max_score) { - $max_score = $score_sum; - $match_lga = $lga; - } - } - - $max_score = 0; - foreach ($match_lga['wardNames'] as $ward) { - similar_text($ward, $candidate['Ward'], $score); - if ($score > $max_score) { - $max_score = $score; - $match_ward = $ward; - } - } - - $candidate['match_lga'] = $match_lga['slug']; - $candidate['match_ward'] = $match_ward; -} - -/* Get picture */ -foreach ($candidate_data as &$candidate) { - $candidate['match_picture'] = ""; -} - $header = ["Ward", "Candidate Name", "Rating", "Picture"]; /* Generate candidates-generic.csv */ @@ -115,7 +78,7 @@ foreach ($lga_list as $lga) { $candidate['match_ward'], $candidate['Name'], $candidate['Score'], - $candidate['match_picture'], + $candidate['Photo'], ]; if (fputcsv($handle, $fields) === FALSE) { diff --git a/csv-generic/gen-image-map.php b/csv-generic/gen-image-map.php new file mode 100644 index 0000000..3c94c1b --- /dev/null +++ b/csv-generic/gen-image-map.php @@ -0,0 +1,54 @@ + 70) $score_sum += $score; + else $score_sum -= 10; + } + } + + if ($score_sum > $max_score) { + $max_score = $score_sum; + $match_lga = $lga; + } + } + + $max_score = 0; + foreach ($match_lga['wardNames'] as $ward) { + similar_text($ward, $candidate['Ward'], $score); + if ($score > $max_score) { + $max_score = $score; + $match_ward = $ward; + } + } + + $candidate['match_lga'] = $match_lga['slug']; + $candidate['match_ward'] = $match_ward; + } +} diff --git a/get-generic.sh b/get-generic.sh index dd1060a..28ef8ed 100755 --- a/get-generic.sh +++ b/get-generic.sh @@ -15,6 +15,15 @@ for folder in "$DATA_PATH"/*; do fi done -#php csv-generic/csv-to-json.php --generic-csv $DATA_LOC/responses.csv -php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}" +image_map=$(php csv-generic/gen-image-map.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}") + +for key in $(jq -r 'keys[]' <<< $image_map) ; do + url=$(jq -r ".[\"$key\"][\"url\"]" <<< $image_map) + lga=$(jq -r ".[\"$key\"][\"match_lga\"]" <<< $image_map) + dst=$DATA_PATH/$lga/$key + echo wget $url -O $dst + break +done + +#php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}" From c27cc2831b3416e53554e4221b65435c33159ba6 Mon Sep 17 00:00:00 2001 From: Kim Taylor Date: Mon, 23 Sep 2024 23:25:12 +1000 Subject: [PATCH 5/6] Image fetch and resize working. --- csv-generic/gen-generic.php | 12 +++++++- csv-generic/parse_generic_csv.php | 4 +-- get-generic.sh | 50 +++++++++++++++++++++++++------ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/csv-generic/gen-generic.php b/csv-generic/gen-generic.php index a0901ce..337b991 100644 --- a/csv-generic/gen-generic.php +++ b/csv-generic/gen-generic.php @@ -61,7 +61,9 @@ foreach ($lga_list as $lga) { if (count($lga_candidates) === 0) continue; - $output_file = dirname($lga['config-file'])."/candidates-generic.csv"; + $dir = dirname($lga['config-file']); + $dir_files = scandir($dir); + $output_file = $dir."/candidates-generic.csv"; if (($handle = fopen($output_file, "w")) === FALSE) { error_log('Error opening output file'); @@ -74,10 +76,18 @@ foreach ($lga_list as $lga) { } foreach ($lga_candidates as $candidate) { + /* Add extension to photo hash */ + foreach ($dir_files as $file) { + if (strstr($file, $candidate['Photo'])) { + $candidate['Photo'] = $file; + } + } + $fields = [ $candidate['match_ward'], $candidate['Name'], $candidate['Score'], + $candidate['Pledge'], $candidate['Photo'], ]; diff --git a/csv-generic/parse_generic_csv.php b/csv-generic/parse_generic_csv.php index c90ee05..9458c2d 100644 --- a/csv-generic/parse_generic_csv.php +++ b/csv-generic/parse_generic_csv.php @@ -20,8 +20,8 @@ function parse_generic_csv($generic_csv) { if (strstr($value, "Political Party")) $value = "Party"; if ($value === "Pledge") { - if (strstr($data[$key], "I pledge")) $data[$key] = "Yes"; - else $data[$key] = "No"; + if (strstr($data[$key], "I pledge")) $data[$key] = "y"; + else $data[$key] = "n"; } if ($value === "Photo") { diff --git a/get-generic.sh b/get-generic.sh index 28ef8ed..9a8686d 100755 --- a/get-generic.sh +++ b/get-generic.sh @@ -2,12 +2,14 @@ #rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data -#rclone --drive-export-formats csv copyto 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $DATA_LOC/responses.csv - GENERIC_SURVEY=../generic-survey/responses.csv +IMAGES=../generic-survey/images DATA_PATH="../spl-data" +echo "Fetching latest responses to generic survey." +rclone -v copyto --drive-export-formats csv 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $GENERIC_SURVEY + config_files=() for folder in "$DATA_PATH"/*; do if test -f "$folder"/config.json; then @@ -15,15 +17,45 @@ for folder in "$DATA_PATH"/*; do fi done - image_map=$(php csv-generic/gen-image-map.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}") +img_list=() for key in $(jq -r 'keys[]' <<< $image_map) ; do - url=$(jq -r ".[\"$key\"][\"url\"]" <<< $image_map) - lga=$(jq -r ".[\"$key\"][\"match_lga\"]" <<< $image_map) - dst=$DATA_PATH/$lga/$key - echo wget $url -O $dst - break + if [ -f $IMAGES/$key ] ; then + continue + fi + img_list+=($key) + img_list+=($IMAGES/$key) done -#php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}" +if [ ${#img_list[*]} -gt 0 ] ; then + echo "Downloading $((${#img_list[*]}/2)) image(s)..." + rclone -v backend copyid bikewest: ${img_list[*]} +fi + +for key in $(jq -r 'keys[]' <<< $image_map) ; do + format=$(identify $IMAGES/$key | awk '{print $2}') + + case $format in + PNG ) suffix=.png ;; + JPEG ) suffix=.jpg ;; + HEIC ) suffix=.jpg ;; + WEBP ) suffix=.png ;; + *) + echo "Error: Unknown image format: $IMAGES/$key" + ;; + esac + + lga=$(jq -r ".[\"$key\"][\"match_lga\"]" <<< $image_map) + dst="$DATA_PATH/$lga/$key$suffix" + + if [ -f $dst ] ; then + continue + fi + + echo "Resizing $dst" + convert $IMAGES/$key -resize 400x400 $dst +done + +echo "Generating candidates-generic.csv files." +php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}" From 6df6cc2d5e37a19831676dfcbf50682404f3bf80 Mon Sep 17 00:00:00 2001 From: Kim Taylor Date: Mon, 23 Sep 2024 23:44:36 +1000 Subject: [PATCH 6/6] Missing pledge in CSV header. --- csv-generic/gen-generic.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csv-generic/gen-generic.php b/csv-generic/gen-generic.php index 337b991..e492dcd 100644 --- a/csv-generic/gen-generic.php +++ b/csv-generic/gen-generic.php @@ -51,7 +51,7 @@ foreach ($candidate_data as &$candidate) { $candidate['Score'] = $score; } -$header = ["Ward", "Candidate Name", "Rating", "Picture"]; +$header = ["Ward", "Candidate Name", "Rating", "Pledge", "Picture"]; /* Generate candidates-generic.csv */ foreach ($lga_list as $lga) {