utf8filenamecheck.c 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /**
  2. * utf8filenamecheck Check if all the filenames in a fiven folder are UTF-8
  3. * Copyright (C) 2023 Johannes 'Banana' Keßler
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  17. */
  18. /**
  19. * 2023 Small linux C tool to check if paths in a folder are utf-8 formatted
  20. * Linux version
  21. */
  22. // https://linux.die.net/man/3/nftw
  23. #define _XOPEN_SOURCE 500
  24. #include <ftw.h>
  25. #include <stdio.h>
  26. #include <string.h>
  27. #include <stdlib.h>
  28. #include <argp.h>
  29. // https://github.com/simdutf/simdutf
  30. #include <simdutf8check.h>
  31. /**
  32. * Commandline arguments
  33. * see: https://www.gnu.org/software/libc/manual/html_node/Argp-Example-3.html#Argp-Example-3
  34. */
  35. const char *argp_program_version = "1.0";
  36. const char *argp_program_bug_address = "https://www.bananas-playground.net/";
  37. static char doc[] = "utf8filenamecheck. Small linux C tool to check if paths in a folder are utf-8 formatted.";
  38. static char args_doc[] = "folder";
  39. /* The options we understand. */
  40. static struct argp_option options[] = {
  41. {"verbose",'v', 0, 0, "Produce verbose output" },
  42. {"quiet",'q', 0, 0, "Produce verbose output" },
  43. { 0 }
  44. };
  45. struct cmdArguments {
  46. char *args[1];
  47. int verbose, quiet;
  48. };
  49. /* Parse a single option. */
  50. static error_t
  51. parse_opt (int key, char *arg, struct argp_state *state) {
  52. struct cmdArguments *arguments = state->input;
  53. switch (key) {
  54. case 'v':
  55. arguments->verbose = 1;
  56. break;
  57. case 'q':
  58. arguments->quiet = 1;
  59. break;
  60. case ARGP_KEY_ARG:
  61. if (state->arg_num >= 1)
  62. // Too many arguments.
  63. argp_usage (state);
  64. arguments->args[state->arg_num] = arg;
  65. break;
  66. case ARGP_KEY_END:
  67. if (state->arg_num < 1)
  68. /* Not enough arguments. */
  69. argp_usage (state);
  70. break;
  71. default:
  72. return ARGP_ERR_UNKNOWN;
  73. }
  74. return 0;
  75. }
  76. static struct argp argp = { options, parse_opt, args_doc, doc };
  77. struct cmdArguments arguments;
  78. /**
  79. * the callback function for nftw
  80. * https://linux.die.net/man/3/nftw
  81. */
  82. static int nftw_callback(const char *fpath,
  83. const struct stat *sb,
  84. int tflag,
  85. struct FTW *ftwbuf) {
  86. if (strcmp(fpath, ".") == 0 || strcmp(fpath, "..") == 0) {
  87. return 0;
  88. }
  89. if(tflag == FTW_DNR) {
  90. if(!arguments.quiet) printf("Can not read %s", fpath);
  91. }
  92. bool result = validate_utf8_fast(fpath, strlen(fpath));
  93. if(result) {
  94. if(!arguments.quiet) printf("%s OK \n", fpath);
  95. } else {
  96. printf("%s FAILED \n", fpath);
  97. }
  98. // continue
  99. return 0;
  100. }
  101. /**
  102. * main routine
  103. */
  104. int main(int argc, char *argv[]) {
  105. /**
  106. * command line argument parsing and default values
  107. */
  108. arguments.verbose = 0;
  109. arguments.quiet = 0;
  110. argp_parse (&argp, argc, argv, 0, 0, &arguments);
  111. if(arguments.verbose) {
  112. printf ("Folder = %s\n"
  113. "Verbose = %s\n"
  114. "Quiet = %s\n",
  115. arguments.args[0],
  116. arguments.verbose ? "yes" : "no",
  117. arguments.quiet ? "yes" : "no"
  118. );
  119. }
  120. if (nftw(arguments.args[0], nftw_callback, 15, FTW_PHYS)== -1) {
  121. perror("Reading dir failed");
  122. exit(EXIT_FAILURE);
  123. }
  124. exit(EXIT_SUCCESS);
  125. }