Jay Taylor's notes

back to listing index

ChristopherSchultz/fast-file-count

[web search]
Original source (github.com)
Tags: linux tools github.com
Clipped on: 2019-06-10

Skip to content
Branch: master

fast-file-count / dircnt.c

Find file Copy path
Image (Asset 3/4) alt= Open this file in GitHub Desktop
1 /**
2 * dircnt.c - a fast file-counting program.
3 *
4 * Written 2015-02-06 by Christopher Schultz as a programming demonstration
5 * for a StackOverflow answer:
6 * https://stackoverflow.com/questions/1427032/fast-linux-file-count-for-a-large-number-of-files/28368788#28368788
7 *
8 * This code is licensed under the Apache License 2.0. Please read the file
9 * LICENSE for more information.
10 *
11 * Please see the README.md file for compilation and usage instructions.
12 *
13 * Thanks to FlyingCodeMonkey, Gary R. Van Sickle, and Jonathan Leffler for
14 * various suggestions and improvements to the original code. Any additional
15 * contributors can be found by looking at the GitHub revision history from
16 * this point forward..
17 */
18 #include <stdio.h>
19 #include <dirent.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <limits.h>
23 #include <sys/stat.h>
24
25 #if defined(WIN32) || defined(_WIN32)
26 #define PATH_SEPARATOR '\\'
27 #else
28 #define PATH_SEPARATOR '/'
29 #endif
30
31 #define EXIT_REACHED_LIMIT 0x01
32
33 /* A custom structure to hold separate file and directory counts */
34 struct filecount {
35 unsigned long dirs;
36 unsigned long files;
37 };
38
39 /*
40 * counts the number of files and directories in the specified directory.
41 *
42 * path - relative pathname of a directory whose files should be counted
43 * counts - pointer to struct containing file/dir counts
44 */
45 void count(char *path, struct filecount *counts) {
46 DIR *dir; /* dir structure we are reading */
47 struct dirent *ent; /* directory entry currently being processed */
48 char subpath[PATH_MAX]; /* buffer for building complete subdir and file names */
49 struct stat statbuf; /* buffer for stat() info. A call to lstat() might be
50 required even if _DIRENT_HAVE_D_TYPE is true
51 because ent->d_type might be DT_UNKNOWN */
52 int isdir; /* flag for a directory entry being a directory */
53
54 #ifdef DEBUG
55 fprintf(stderr, "Opening dir %s\n", path);
56 #endif
57 dir = opendir(path);
58
59 /* opendir failed... file likely doesn't exist or isn't a directory */
60 if(NULL == dir) {
61 perror(path);
62 return;
63 }
64
65 while((ent = readdir(dir))) {
66 if (strlen(path) + 1 + strlen(ent->d_name) > PATH_MAX) {
67 fprintf(stdout, "path too long (%ld) %s%c%s", (strlen(path) + 1 + strlen(ent->d_name)), path, PATH_SEPARATOR, ent->d_name);
68 return;
69 }
70
71 isdir = 0; /* reset isdir flag */
72 #ifdef DEBUG
73 fprintf(stderr, "Considering %s%c%s\n", path, PATH_SEPARATOR, ent->d_name);
74 #endif /* DEBUG */
75
76 /* Use dirent.d_type if present, otherwise use stat() */
77 #if ( defined ( _DIRENT_HAVE_D_TYPE ) && !PREFER_STAT)
78 if(DT_UNKNOWN == ent->d_type) {
79 /* Must perform lstat() anyway */
80 #ifdef DEBUG
81 fprintf(stderr, "Dirent type is DT_UNKNOWN, must perform lstat()\n");
82 #endif /* DEBUG */
83 sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name);
84 if(lstat(subpath, &statbuf)) {
85 perror(subpath);
86 return;
87 }
88 if(S_ISDIR(statbuf.st_mode)) {
89 #ifdef DEBUG
90 fprintf(stderr, "Determined %s is a directory via lstat(1)\n", subpath);
91 #endif /* DEBUG */
92 isdir = 1;
93 }
94 } else if(DT_DIR == ent->d_type) {
95 #ifdef DEBUG
96 fprintf(stderr, "Determined %s%c%s is a directory via dirent\n", path, PATH_SEPARATOR, ent->d_name);
97 #endif /* DEBUG */
98 isdir = 1;
99 }
100 #else
101 sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name);
102 if(lstat(subpath, &statbuf)) {
103 perror(subpath);
104 return;
105 }
106
107 if(S_ISDIR(statbuf.st_mode)) {
108 #ifdef DEBUG
109 fprintf(stderr, "S_ISDIR=%d, mode bits=%x\n", S_ISDIR(statbuf.st_mode), statbuf.st_mode);
110 fprintf(stderr, "Determined %s is a directory via lstat(2)\n", subpath);
111 #endif /* DEBUG */
112 isdir = 1;
113 }
114 #endif /* if defined _DIRENT_HAVE_D_TYPE, etc. */
115
116 #ifdef DEBUG
117 fprintf(stderr, "name=%s, isdir=%d\n", ent->d_name, isdir);
118 #endif
119
120 if(isdir) {
121 /* Skip "." and ".." directory entries... they are not "real" directories */
122 if(0 == strcmp("..", ent->d_name) || 0 == strcmp(".", ent->d_name)) {
123 /* fprintf(stderr, "This is %s, skipping\n", ent->d_name); */
124 } else {
125 if(ULONG_MAX == counts->dirs) {
126 fprintf(stderr, "Reached maximum number of directories to count (%lu) after %lu files\n", counts->dirs, counts->files);
127 exit(EXIT_REACHED_LIMIT);
128 }
129 sprintf(subpath, "%s%c%s", path, PATH_SEPARATOR, ent->d_name);
130 counts->dirs++;
131 count(subpath, counts);
132 }
133 } else {
134 if(ULONG_MAX == counts->files) {
135 fprintf(stderr, "Reached maximum number of files to count (%lu) after %lu directories\n", counts->files, counts->dirs);
136 exit(EXIT_REACHED_LIMIT);
137 }
138
139 counts->files++;
140 }
141 }
142
143 #ifdef DEBUG
144 fprintf(stderr, "Closing dir %s\n", path);
145 #endif
146 closedir(dir);
147 }
148
149 int main(int argc, char *argv[]) {
150 struct filecount counts;
151 char *dir;
152 counts.files = 0;
153 counts.dirs = 0;
154 if(argc > 1)
155 dir = argv[1];
156 else
157 dir = ".";
158
159 #ifdef DEBUG
160 #if PREFER_STAT
161 fprintf(stderr, "Compiled with PREFER_STAT. Using lstat()\n");
162 #elif defined ( _DIRENT_HAVE_D_TYPE )
163 fprintf(stderr, "Using dirent.d_type\n");
164 #else
165 fprintf(stderr, "Don't have dirent.d_type, falling back to using lstat()\n");
166 #endif
167 #endif
168
169 count(dir, &counts);
170
171 /* If we found nothing, this is probably an error which has already been printed */
172 if(0 < counts.files || 0 < counts.dirs) {
173 printf("%s contains %lu files and %lu directories\n", dir, counts.files, counts.dirs);
174 }
175
176 return 0;
177 }
178