Jay Taylor's notes
back to listing indexChristopherSchultz/fast-file-count
[web search]
Original source (github.com)
Clipped on: 2019-06-10
Skip to content
| 1 | /** |
| 2 | * dircnt.c - a fast file-counting program. |
| 3 | * |
| 4 | * Written 2015-02-06 by Christopher Schultz as a programming demonstration |
| 5 | * for a StackOverflow answer: |
| 6 | * https://stackoverflow.com/questions/1427032/fast-linux-file-count-for-a-large-number-of-files/28368788#28368788 |
| 7 | * |
| 8 | * This code is licensed under the Apache License 2.0. Please read the file |
| 9 | * LICENSE for more information. |
| 10 | * |
| 11 | * Please see the README.md file for compilation and usage instructions. |
| 12 | * |
| 13 | * Thanks to FlyingCodeMonkey, Gary R. Van Sickle, and Jonathan Leffler for |
| 14 | * various suggestions and improvements to the original code. Any additional |
| 15 | * contributors can be found by looking at the GitHub revision history from |
| 16 | * this point forward.. |
| 17 | */ |
| 18 | #include <stdio.h> |
| 19 | #include <dirent.h> |
| 20 | #include <string.h> |
| 21 | #include <stdlib.h> |
| 22 | #include <limits.h> |
| 23 | #include <sys/stat.h> |
| 24 | |
| 25 | #if defined(WIN32) || defined(_WIN32) |
| 26 | #define PATH_SEPARATOR '\\' |
| 27 | #else |
| 28 | #define PATH_SEPARATOR '/' |
| 29 | #endif |
| 30 | |
| 31 | #define EXIT_REACHED_LIMIT 0x01 |
| 32 | |
| 33 | /* A custom structure to hold separate file and directory counts */ |
| 34 | struct filecount { |
| 35 | unsigned long dirs; |
| 36 | unsigned long files; |
| 37 | }; |
| 38 | |
| 39 | /* |
| 40 | * counts the number of files and directories in the specified directory. |
| 41 | * |
| 42 | * path - relative pathname of a directory whose files should be counted |
| 43 | * counts - pointer to struct containing file/dir counts |
| 44 | */ |
| 45 | void count(char *path, struct filecount *counts) { |
| 46 | DIR *dir; /* dir structure we are reading */ |
| 47 | struct dirent *ent; /* directory entry currently being processed */ |
| 48 | char subpath[PATH_MAX]; /* buffer for building complete subdir and file names */ |
| 49 | struct stat statbuf; /* buffer for stat() info. A call to lstat() might be |
| 50 | required even if _DIRENT_HAVE_D_TYPE is true |
| 51 | because ent->d_type might be DT_UNKNOWN */ |
| 52 | int isdir; /* flag for a directory entry being a directory */ |
| 53 | |
| 54 | #ifdef DEBUG |
| 55 | fprintf(stderr, "Opening dir %s\n", path); |
| 56 | #endif |
| 57 | dir = opendir(path); |
| 58 | |
| 59 | /* opendir failed... file likely doesn't exist or isn't a directory */ |
| 60 | if(NULL == dir) { |
| 61 | perror(path); |
| 62 | return; |
| 63 | } |
| 64 | |
| 65 | while((ent = readdir(dir))) { |
| 66 | if (strlen(path) + 1 + strlen(ent->d_name) > PATH_MAX) { |
| 67 | fprintf(stdout, "path too long (%ld) %s%c%s", (strlen(path) + 1 + strlen(ent->d_name)), path, PATH_SEPARATOR, ent->d_name); |
| 68 | return; |
| 69 | } |
| 70 | |
| 71 | isdir = 0; /* reset isdir flag */ |
| 72 | #ifdef DEBUG |
| 73 | fprintf(stderr, "Considering %s%c%s\n", path, PATH_SEPARATOR, ent->d_name); |
| 74 | #endif /* DEBUG */ |
| 75 | |
| 76 | /* Use dirent.d_type if present, otherwise use stat() */ |
| 77 | #if ( defined ( _DIRENT_HAVE_D_TYPE ) && !PREFER_STAT) |
| 78 | if(DT_UNKNOWN == ent->d_type) { |
| 79 | /* Must perform lstat() anyway */ |
| 80 | #ifdef DEBUG |
| 81 | fprintf(stderr, "Dirent type is DT_UNKNOWN, must perform lstat()\n"); |
| 82 | #endif /* DEBUG */ |
| 83 | sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name); |
| 84 | if(lstat(subpath, &statbuf)) { |
| 85 | perror(subpath); |
| 86 | return; |
| 87 | } |
| 88 | if(S_ISDIR(statbuf.st_mode)) { |
| 89 | #ifdef DEBUG |
| 90 | fprintf(stderr, "Determined %s is a directory via lstat(1)\n", subpath); |
| 91 | #endif /* DEBUG */ |
| 92 | isdir = 1; |
| 93 | } |
| 94 | } else if(DT_DIR == ent->d_type) { |
| 95 | #ifdef DEBUG |
| 96 | fprintf(stderr, "Determined %s%c%s is a directory via dirent\n", path, PATH_SEPARATOR, ent->d_name); |
| 97 | #endif /* DEBUG */ |
| 98 | isdir = 1; |
| 99 | } |
| 100 | #else |
| 101 | sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name); |
| 102 | if(lstat(subpath, &statbuf)) { |
| 103 | perror(subpath); |
| 104 | return; |
| 105 | } |
| 106 | |
| 107 | if(S_ISDIR(statbuf.st_mode)) { |
| 108 | #ifdef DEBUG |
| 109 | fprintf(stderr, "S_ISDIR=%d, mode bits=%x\n", S_ISDIR(statbuf.st_mode), statbuf.st_mode); |
| 110 | fprintf(stderr, "Determined %s is a directory via lstat(2)\n", subpath); |
| 111 | #endif /* DEBUG */ |
| 112 | isdir = 1; |
| 113 | } |
| 114 | #endif /* if defined _DIRENT_HAVE_D_TYPE, etc. */ |
| 115 | |
| 116 | #ifdef DEBUG |
| 117 | fprintf(stderr, "name=%s, isdir=%d\n", ent->d_name, isdir); |
| 118 | #endif |
| 119 | |
| 120 | if(isdir) { |
| 121 | /* Skip "." and ".." directory entries... they are not "real" directories */ |
| 122 | if(0 == strcmp("..", ent->d_name) || 0 == strcmp(".", ent->d_name)) { |
| 123 | /* fprintf(stderr, "This is %s, skipping\n", ent->d_name); */ |
| 124 | } else { |
| 125 | if(ULONG_MAX == counts->dirs) { |
| 126 | fprintf(stderr, "Reached maximum number of directories to count (%lu) after %lu files\n", counts->dirs, counts->files); |
| 127 | exit(EXIT_REACHED_LIMIT); |
| 128 | } |
| 129 | sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name); |
| 130 | counts->dirs++; |
| 131 | count(subpath, counts); |
| 132 | } |
| 133 | } else { |
| 134 | if(ULONG_MAX == counts->files) { |
| 135 | fprintf(stderr, "Reached maximum number of files to count (%lu) after %lu directories\n", counts->files, counts->dirs); |
| 136 | exit(EXIT_REACHED_LIMIT); |
| 137 | } |
| 138 | |
| 139 | counts->files++; |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | #ifdef DEBUG |
| 144 | fprintf(stderr, "Closing dir %s\n", path); |
| 145 | #endif |
| 146 | closedir(dir); |
| 147 | } |
| 148 | |
| 149 | int main(int argc, char *argv[]) { |
| 150 | struct filecount counts; |
| 151 | char *dir; |
| 152 | counts.files = 0; |
| 153 | counts.dirs = 0; |
| 154 | if(argc > 1) |
| 155 | dir = argv[1]; |
| 156 | else |
| 157 | dir = "."; |
| 158 | |
| 159 | #ifdef DEBUG |
| 160 | #if PREFER_STAT |
| 161 | fprintf(stderr, "Compiled with PREFER_STAT. Using lstat()\n"); |
| 162 | #elif defined ( _DIRENT_HAVE_D_TYPE ) |
| 163 | fprintf(stderr, "Using dirent.d_type\n"); |
| 164 | #else |
| 165 | fprintf(stderr, "Don't have dirent.d_type, falling back to using lstat()\n"); |
| 166 | #endif |
| 167 | #endif |
| 168 | |
| 169 | count(dir, &counts); |
| 170 | |
| 171 | /* If we found nothing, this is probably an error which has already been printed */ |
| 172 | if(0 < counts.files || 0 < counts.dirs) { |
| 173 | printf("%s contains %lu files and %lu directories\n", dir, counts.files, counts.dirs); |
| 174 | } |
| 175 | |
| 176 | return 0; |
| 177 | } |
| 178 |


