diff --git a/csv-generic/gen-generic.php b/csv-generic/gen-generic.php new file mode 100644 index 0000000..e492dcd --- /dev/null +++ b/csv-generic/gen-generic.php @@ -0,0 +1,101 @@ + $value) { + /* Override key name for questions */ + if ($value === "Verified") { + $is_question = false; + } + + if (strstr($value, "candidate photo")) $value = "Photo"; + if (strstr($value, "In which Local Government Area")) $value = "LGA"; + if (strstr($value, "In which Ward")) $value = "Ward"; + if (strstr($value, "Political Party")) $value = "Party"; + + if ($value === "Pledge") { + if (strstr($data[$key], "I pledge")) $data[$key] = "y"; + else $data[$key] = "n"; + } + + if ($value === "Photo") { + $candidate['photo_url'] = $data[$key]; + $data[$key] = preg_filter("/.*id=/", "", $data[$key]); + } + + if ($is_question) { + $candidate['q'.$question_no++] = $data[$key]; + } else { + $candidate[$value] = $data[$key]; + } + + if ($value === "Pledge") { + $is_question = true; + } + } + $candidate_data[] = $candidate; + } + fclose($handle); + } else { + error_log('Error opening candidates file'); + exit(1); + } + + return $candidate_data; +} + +function match_lga(&$candidate_data, $lga_list) { + foreach ($candidate_data as &$candidate) { + /* Match user typed LGA/Ward to our database */ + $max_score = 0; + foreach ($lga_list as $lga) { + $aa = preg_split("/[^a-z]/", strtolower($candidate['LGA'])); + $bb = preg_split("/[^a-z]/", $lga['slug']); + + $score_sum = 0; + foreach ($aa as $a) { + foreach ($bb as $b) { + similar_text($a, $b, $score); + if ($score > 70) $score_sum += $score; + else $score_sum -= 10; + } + } + + if ($score_sum > $max_score) { + $max_score = $score_sum; + $match_lga = $lga; + } + } + + $max_score = 0; + foreach ($match_lga['wardNames'] as $ward) { + similar_text($ward, $candidate['Ward'], $score); + if ($score > $max_score) { + $max_score = $score; + $match_ward = $ward; + } + } + + $candidate['match_lga'] = $match_lga['slug']; + $candidate['match_ward'] = $match_ward; + } +} diff --git a/get-generic.sh b/get-generic.sh new file mode 100755 index 0000000..9a8686d --- /dev/null +++ b/get-generic.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +#rclone sync --progress bikewest:spl_generic_survey_2024 $DATA_LOC/google-data + +GENERIC_SURVEY=../generic-survey/responses.csv +IMAGES=../generic-survey/images + +DATA_PATH="../spl-data" + +echo "Fetching latest responses to generic survey." +rclone -v copyto --drive-export-formats csv 'bikewest:spl_generic_survey_2024/Streets People Love council election candidate pledge and survey (Responses).csv' $GENERIC_SURVEY + +config_files=() +for folder in "$DATA_PATH"/*; do + if test -f "$folder"/config.json; then + config_files+=("$folder"/config.json) + fi +done + +image_map=$(php csv-generic/gen-image-map.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}") + +img_list=() +for key in $(jq -r 'keys[]' <<< $image_map) ; do + if [ -f $IMAGES/$key ] ; then + continue + fi + img_list+=($key) + img_list+=($IMAGES/$key) +done + +if [ ${#img_list[*]} -gt 0 ] ; then + echo "Downloading $((${#img_list[*]}/2)) image(s)..." + rclone -v backend copyid bikewest: ${img_list[*]} +fi + +for key in $(jq -r 'keys[]' <<< $image_map) ; do + format=$(identify $IMAGES/$key | awk '{print $2}') + + case $format in + PNG ) suffix=.png ;; + JPEG ) suffix=.jpg ;; + HEIC ) suffix=.jpg ;; + WEBP ) suffix=.png ;; + *) + echo "Error: Unknown image format: $IMAGES/$key" + ;; + esac + + lga=$(jq -r ".[\"$key\"][\"match_lga\"]" <<< $image_map) + dst="$DATA_PATH/$lga/$key$suffix" + + if [ -f $dst ] ; then + continue + fi + + echo "Resizing $dst" + convert $IMAGES/$key -resize 400x400 $dst +done + +echo "Generating candidates-generic.csv files." +php csv-generic/gen-generic.php --generic-csv $GENERIC_SURVEY --config-files "${config_files[*]}"