Jay Taylor's notes
back to listing indexChristopherSchultz/fast-file-count
[web search]
Original source (github.com)
Clipped on: 2019-06-10
Skip to content
1 | /** |
2 | * dircnt.c - a fast file-counting program. |
3 | * |
4 | * Written 2015-02-06 by Christopher Schultz as a programming demonstration |
5 | * for a StackOverflow answer: |
6 | * https://stackoverflow.com/questions/1427032/fast-linux-file-count-for-a-large-number-of-files/28368788#28368788 |
7 | * |
8 | * This code is licensed under the Apache License 2.0. Please read the file |
9 | * LICENSE for more information. |
10 | * |
11 | * Please see the README.md file for compilation and usage instructions. |
12 | * |
13 | * Thanks to FlyingCodeMonkey, Gary R. Van Sickle, and Jonathan Leffler for |
14 | * various suggestions and improvements to the original code. Any additional |
15 | * contributors can be found by looking at the GitHub revision history from |
16 | * this point forward.. |
17 | */ |
18 | #include <stdio.h> |
19 | #include <dirent.h> |
20 | #include <string.h> |
21 | #include <stdlib.h> |
22 | #include <limits.h> |
23 | #include <sys/stat.h> |
24 | |
25 | #if defined(WIN32) || defined(_WIN32) |
26 | #define PATH_SEPARATOR '\\' |
27 | #else |
28 | #define PATH_SEPARATOR '/' |
29 | #endif |
30 | |
31 | #define EXIT_REACHED_LIMIT 0x01 |
32 | |
33 | /* A custom structure to hold separate file and directory counts */ |
34 | struct filecount { |
35 | unsigned long dirs; |
36 | unsigned long files; |
37 | }; |
38 | |
39 | /* |
40 | * counts the number of files and directories in the specified directory. |
41 | * |
42 | * path - relative pathname of a directory whose files should be counted |
43 | * counts - pointer to struct containing file/dir counts |
44 | */ |
45 | void count(char *path, struct filecount *counts) { |
46 | DIR *dir; /* dir structure we are reading */ |
47 | struct dirent *ent; /* directory entry currently being processed */ |
48 | char subpath[PATH_MAX]; /* buffer for building complete subdir and file names */ |
49 | struct stat statbuf; /* buffer for stat() info. A call to lstat() might be |
50 | required even if _DIRENT_HAVE_D_TYPE is true |
51 | because ent->d_type might be DT_UNKNOWN */ |
52 | int isdir; /* flag for a directory entry being a directory */ |
53 | |
54 | #ifdef DEBUG |
55 | fprintf(stderr, "Opening dir %s\n", path); |
56 | #endif |
57 | dir = opendir(path); |
58 | |
59 | /* opendir failed... file likely doesn't exist or isn't a directory */ |
60 | if(NULL == dir) { |
61 | perror(path); |
62 | return; |
63 | } |
64 | |
65 | while((ent = readdir(dir))) { |
66 | if (strlen(path) + 1 + strlen(ent->d_name) > PATH_MAX) { |
67 | fprintf(stdout, "path too long (%ld) %s%c%s", (strlen(path) + 1 + strlen(ent->d_name)), path, PATH_SEPARATOR, ent->d_name); |
68 | return; |
69 | } |
70 | |
71 | isdir = 0; /* reset isdir flag */ |
72 | #ifdef DEBUG |
73 | fprintf(stderr, "Considering %s%c%s\n", path, PATH_SEPARATOR, ent->d_name); |
74 | #endif /* DEBUG */ |
75 | |
76 | /* Use dirent.d_type if present, otherwise use stat() */ |
77 | #if ( defined ( _DIRENT_HAVE_D_TYPE ) && !PREFER_STAT) |
78 | if(DT_UNKNOWN == ent->d_type) { |
79 | /* Must perform lstat() anyway */ |
80 | #ifdef DEBUG |
81 | fprintf(stderr, "Dirent type is DT_UNKNOWN, must perform lstat()\n"); |
82 | #endif /* DEBUG */ |
83 | sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name); |
84 | if(lstat(subpath, &statbuf)) { |
85 | perror(subpath); |
86 | return; |
87 | } |
88 | if(S_ISDIR(statbuf.st_mode)) { |
89 | #ifdef DEBUG |
90 | fprintf(stderr, "Determined %s is a directory via lstat(1)\n", subpath); |
91 | #endif /* DEBUG */ |
92 | isdir = 1; |
93 | } |
94 | } else if(DT_DIR == ent->d_type) { |
95 | #ifdef DEBUG |
96 | fprintf(stderr, "Determined %s%c%s is a directory via dirent\n", path, PATH_SEPARATOR, ent->d_name); |
97 | #endif /* DEBUG */ |
98 | isdir = 1; |
99 | } |
100 | #else |
101 | sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name); |
102 | if(lstat(subpath, &statbuf)) { |
103 | perror(subpath); |
104 | return; |
105 | } |
106 | |
107 | if(S_ISDIR(statbuf.st_mode)) { |
108 | #ifdef DEBUG |
109 | fprintf(stderr, "S_ISDIR=%d, mode bits=%x\n", S_ISDIR(statbuf.st_mode), statbuf.st_mode); |
110 | fprintf(stderr, "Determined %s is a directory via lstat(2)\n", subpath); |
111 | #endif /* DEBUG */ |
112 | isdir = 1; |
113 | } |
114 | #endif /* if defined _DIRENT_HAVE_D_TYPE, etc. */ |
115 | |
116 | #ifdef DEBUG |
117 | fprintf(stderr, "name=%s, isdir=%d\n", ent->d_name, isdir); |
118 | #endif |
119 | |
120 | if(isdir) { |
121 | /* Skip "." and ".." directory entries... they are not "real" directories */ |
122 | if(0 == strcmp("..", ent->d_name) || 0 == strcmp(".", ent->d_name)) { |
123 | /* fprintf(stderr, "This is %s, skipping\n", ent->d_name); */ |
124 | } else { |
125 | if(ULONG_MAX == counts->dirs) { |
126 | fprintf(stderr, "Reached maximum number of directories to count (%lu) after %lu files\n", counts->dirs, counts->files); |
127 | exit(EXIT_REACHED_LIMIT); |
128 | } |
129 | sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name); |
130 | counts->dirs++; |
131 | count(subpath, counts); |
132 | } |
133 | } else { |
134 | if(ULONG_MAX == counts->files) { |
135 | fprintf(stderr, "Reached maximum number of files to count (%lu) after %lu directories\n", counts->files, counts->dirs); |
136 | exit(EXIT_REACHED_LIMIT); |
137 | } |
138 | |
139 | counts->files++; |
140 | } |
141 | } |
142 | |
143 | #ifdef DEBUG |
144 | fprintf(stderr, "Closing dir %s\n", path); |
145 | #endif |
146 | closedir(dir); |
147 | } |
148 | |
149 | int main(int argc, char *argv[]) { |
150 | struct filecount counts; |
151 | char *dir; |
152 | counts.files = 0; |
153 | counts.dirs = 0; |
154 | if(argc > 1) |
155 | dir = argv[1]; |
156 | else |
157 | dir = "."; |
158 | |
159 | #ifdef DEBUG |
160 | #if PREFER_STAT |
161 | fprintf(stderr, "Compiled with PREFER_STAT. Using lstat()\n"); |
162 | #elif defined ( _DIRENT_HAVE_D_TYPE ) |
163 | fprintf(stderr, "Using dirent.d_type\n"); |
164 | #else |
165 | fprintf(stderr, "Don't have dirent.d_type, falling back to using lstat()\n"); |
166 | #endif |
167 | #endif |
168 | |
169 | count(dir, &counts); |
170 | |
171 | /* If we found nothing, this is probably an error which has already been printed */ |
172 | if(0 < counts.files || 0 < counts.dirs) { |
173 | printf("%s contains %lu files and %lu directories\n", dir, counts.files, counts.dirs); |
174 | } |
175 | |
176 | return 0; |
177 | } |
178 |