/** * utf8filenamecheck Check if all the filenames in a fiven folder are UTF-8 * Copyright (C) 2023 Johannes 'Banana' Keßler * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** * 2023 Small linux C tool to check if paths in a folder are utf-8 formatted * Linux version */ // https://linux.die.net/man/3/nftw #define _XOPEN_SOURCE 500 #include #include #include #include #include // https://github.com/simdutf/simdutf #include /** * Commandline arguments * see: https://www.gnu.org/software/libc/manual/html_node/Argp-Example-3.html#Argp-Example-3 */ const char *argp_program_version = "1.0"; const char *argp_program_bug_address = "https://www.bananas-playground.net/"; static char doc[] = "utf8filenamecheck. Small linux C tool to check if paths in a folder are utf-8 formatted."; static char args_doc[] = "folder"; /* The options we understand. */ static struct argp_option options[] = { {"verbose",'v', 0, 0, "Produce verbose output" }, {"quiet",'q', 0, 0, "Produce verbose output" }, { 0 } }; struct cmdArguments { char *args[1]; int verbose, quiet; }; /* Parse a single option. */ static error_t parse_opt (int key, char *arg, struct argp_state *state) { struct cmdArguments *arguments = state->input; switch (key) { case 'v': arguments->verbose = 1; break; case 'q': arguments->quiet = 1; break; case ARGP_KEY_ARG: if (state->arg_num >= 1) // Too many arguments. argp_usage (state); arguments->args[state->arg_num] = arg; break; case ARGP_KEY_END: if (state->arg_num < 1) /* Not enough arguments. */ argp_usage (state); break; default: return ARGP_ERR_UNKNOWN; } return 0; } static struct argp argp = { options, parse_opt, args_doc, doc }; struct cmdArguments arguments; /** * the callback function for nftw * https://linux.die.net/man/3/nftw */ static int nftw_callback(const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf) { if (strcmp(fpath, ".") == 0 || strcmp(fpath, "..") == 0) { return 0; } if(tflag == FTW_DNR) { if(!arguments.quiet) printf("Can not read %s", fpath); } bool result = validate_utf8_fast(fpath, strlen(fpath)); if(result) { if(!arguments.quiet) printf("%s OK \n", fpath); } else { printf("%s FAILED \n", fpath); } // continue return 0; } /** * main routine */ int main(int argc, char *argv[]) { /** * command line argument parsing and default values */ arguments.verbose = 0; arguments.quiet = 0; argp_parse (&argp, argc, argv, 0, 0, &arguments); if(arguments.verbose) { printf ("Folder = %s\n" "Verbose = %s\n" "Quiet = %s\n", arguments.args[0], arguments.verbose ? "yes" : "no", arguments.quiet ? "yes" : "no" ); } if (nftw(arguments.args[0], nftw_callback, 15, FTW_PHYS)== -1) { perror("Reading dir failed"); exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); }