Link SVN files for a system repository
if you don't want to have the whole /etc under version control
[Intro] [C program]

Intro

Subversion, svn for short, is a handy version control system. Nowadays, Git is more on fashion, especially for team projects. Yet, for single individuals or restricted groups, Subversion works well and is somewhat simpler. However, it has a couple of drawbacks. One, it creates a .svn subdirectory in every controlled directory. Two, it saves symbolic links as symbolic links, which implies the content is lost unless the linked file is itself under version control in the same repository. So, to backup just selected files from /etc or similar large folders, we want to add hard links to them in a single directory inside our repository.

Subversion is extensible in that it provides for properties; that is, versioned metadata attached to any versioned object. Properties used by svn have names beginning with svn:. Typically, svn automatically sets svn:eol-style and svn:mime-type on each file. Users may maintain svn:ignore on directories, using svn prop* subcommands (mostly propedit, abbreviated pe).

Using a property named lnsvn we keep trace of what a hard link is linked to. By convention, we call the directory containing links LINKS (all capitals), ank keep the base name of each file unaltered. Assuming you already have a versioned directory, to start versioning links to selected files do so:

here:repository$ svn mkdir LINKS
here:repository$ svn pe lnsvn LINKS
... edit the file (see text below) ...
Set new value for property 'lnsvn' on 'LINKS'
here:repository$ lnsvn
add link to /whatever/you/set in LINKS
...
here:repository$ svn add LINKS/*
A         LINKS/set
...
here:repository$ svn commit

In the property file, type the full path of the file(s) you want to link. Save. Running lnsvn actually creates the link(s). Finally, add the linked file(s) to svn version control. Of course, you should have compiled the program below and placed it somewhere on the PATH with setuid permission before calling it.

You can read the format of the property file in the fgets() loop below: initial spaces are discarded, # comments and empty lines are ignored, relative path are not permitted, and the keyword COPY can be set before the filename to force actually copying the file instead of hard linking. That's necessary for files, like /etc/crontab that don't want to be hard linked. Can also be used for files in different partitions (recall hard links have that limit).

The program reads the lnsvn property and checks every link/copy. Just remember to run it before committing.

C program

001: // lnsvn.c written by vesely in milan on Thu 25 Nov 2021
002: 
003: // gcc -g -W -Wall -o ~/bin/lnsvn lnsvn.c
004: // sudo chmod / chown 
005: 
006: /*
007: *
008: * Usage:
009: *
010: *    lnsvn [options] [TARGET...]
011: *
012: * Read the lnsvn property of each TARGET, by default the LINKS 
013: * directory.  The property should contain a list of "original" files, 
014: * given with full path outside the repository. The basename of each 
015: * file becomes a hard link in the current TARGET to the listed original 
016: * file.
017: *
018: * lnsvn checks that the correspondence holds, by verifying that the 
019: * inode number of links or the size and date of copies.  If they don't 
020: * match, the target file in the working copy is deleted and relinked to 
021: * or copied from the original file.  Target files should never be 
022: * edited!  Editing the original file can remove the link, as the editor 
023: * may create a new file on editing.
024: *
025: * If the original file doesn't exist, lnsvn signals an error.
026: *
027: * If a file in the target directory has no original file, a warning.
028: */
029: #define _GNU_SOURCE
030: #include <stdio.h>
031: #include <stdlib.h>
032: #include <stdint.h>
033: #include <string.h>
034: #include <ctype.h>
035: #include <sys/types.h>
036: #include <sys/stat.h>
037: #include <sys/wait.h>
038: #include <sys/sendfile.h>
039: #include <unistd.h>
040: #include <dirent.h>
041: #include <fcntl.h>
042: #include <stdarg.h>
043: #include <errno.h>
044: 
045: #include <assert.h>
046: 
047: 
048: static const char *program_name;
049: static inline char *my_basename(char const *name) // neither GNU nor POSIX...
050: {
051:    char *b = strrchr(name, '/');
052:    if (b)
053:       return b + 1;
054:    return (char*)name;
055: }
056: 
057: 
058: #if defined __GNUC__
059: __attribute__ ((format(printf, 1, 2)))
060: #endif
061: static void print_err(char const *fmt, ...)
062: {
063:    fprintf(stderr, "%s: ", program_name);
064:    va_list ap;
065:    va_start(ap, fmt);
066:    vfprintf(stderr, fmt, ap);
067:    va_end(ap);
068:    fputc('\n', stderr);
069: }
070: 
071: typedef struct lnsvn
072: {
073:    struct lnsvn *next;
074:    char const *basename; // inside orig
075:    int lineno;           // line in property
076:    char copy;            // copy this file instead of linking it
077:    char orig[1];         // original file (variable size structure)
078: } lnsvn;
079: 
080: // global options
081: static int verbose = 0;
082: typedef enum verbose_what
083: {
084:    verbose_none,
085:    verbose_file,
086:    verbose_load
087: } verbose_what;
088: 
089: 
090: static void free_lnsvn(lnsvn *l)
091: {
092:    while (l)
093:    {
094:       lnsvn *tmp = l->next;
095:       free(l);
096:       l = tmp;
097:    }
098: }
099: 
100: static int load_lsvn(char const *target, lnsvn **base)
101: /*
102: * Load a linked list of lnsvn in basename collating order.
103: * Return -1 on hard error, 1 on soft error, 0 on OK.
104: */
105: {
106:    int rtc = 0;
107:    assert(base);
108: 
109:    *base = NULL;
110: 
111:    int pipefd[2];
112:    if (pipe(pipefd) != 0)
113:    {
114:       print_err("pipe failure: %s", strerror(errno));
115:       return -1;
116:    }
117: 
118:    pid_t pid = fork();
119:    if (pid == -1)
120:    {
121:       print_err("fork failure: %s", strerror(errno));
122:       close(pipefd[0]);
123:       close(pipefd[1]);
124:       return -1;
125:    }
126: 
127:    if (pid == 0) // child, get property on stdout
128:    {
129:       close(pipefd[0]); // unused read end
130:       close(STDOUT_FILENO);
131:       errno = 0;
132:       if (dup(pipefd[1]) != STDOUT_FILENO)
133:       {
134:          print_err("dup failure: %s", strerror(errno));
135:          close(pipefd[1]);
136:          _exit(1);
137:       }
138: 
139:       uid_t ruid, euid, suid; // drop privileges is setuid
140:       if (getresuid(&ruid, &euid, &suid) != 0 ||
141:          setresuid(suid, suid, suid) != 0)
142:       {
143:          print_err("get/set uid failure: %s", strerror(errno));
144:          close(pipefd[1]);
145:          _exit(1);
146:       }
147: 
148:       execlp("svn", "svn", "propget", "lnsvn", NULL);
149:       print_err("execlp failure: %s", strerror(errno));
150:       _exit(1);
151:    }
152: 
153:    // parent, read the property and build the list
154:    close(pipefd[1]); // unused write end
155:    FILE *filepipe = fdopen(pipefd[0], "r");
156:    if (filepipe == NULL)
157:    {
158:       print_err("fdopen failure: %s", strerror(errno));
159:       close(pipefd[0]);
160:       rtc = -1;
161:    }
162:    else
163:    {
164:       int lineno = 0;
165:       char *s, buf[PATH_MAX];
166:       while ((s = fgets(buf, sizeof buf, filepipe)) != NULL)
167:       {
168:          ++lineno;
169:          char *eol = strchr(s, '\n');
170:          int ch;
171:          if (eol == NULL)
172:          {
173:             rtc = 1;
174:             print_err("line %d of property too long, max=%zu",
175:                lineno, sizeof(buf));
176:             while ((ch = fgetc(filepipe)) != EOF)
177:                if (ch == '\n')
178:                   break;
179:          }
180:          else
181:          {
182:             // left trim, discard comments
183:             while (isspace(ch = *(unsigned char*)s) && ch != 0)
184:                ++s;
185:             if (ch == '#')
186:                continue;
187: 
188:             // right trim, discard empty lines
189:             --eol;
190:             while (s < eol && isspace(*(unsigned char*)eol))
191:                --eol;
192:             if (s >= eol)
193:                continue;
194: 
195:             *++eol = 0;
196: 
197:             // check special directive
198:             int copy = 0;
199:             if (*s != '/')
200:             {
201:                char *d = s;
202:                while (s < eol && !isspace(*(unsigned char*)s))
203:                   ++s;
204:                if (s >= eol)
205:                {
206:                   print_err(
207:                      "line %d: relative file or bare directive: %s",
208:                      lineno, d);
209:                   continue;
210:                }
211: 
212:                // left trim filename
213:                *s++ = 0;
214:                while (s < eol && isspace(*(unsigned char*)s))
215:                   ++s;
216:                assert(s < eol);
217: 
218:                if (strcasecmp(d, "copy") == 0)
219:                   copy = 1;
220:                else
221:                {
222:                   print_err(
223:                      "line %d: unrecognized directive %s for %s",
224:                      lineno, d, s);
225:                   continue;
226:                }
227:             }
228: 
229:             unsigned size = eol - s + sizeof(lnsvn);
230:             lnsvn *l = malloc(size);
231:             if (l == NULL)
232:             {
233:                print_err("MEMORY FAILURE");
234:                free_lnsvn(*base);
235:                *base = NULL;
236:                rtc = -1;
237:                break;
238:             }
239: 
240:             memset(l, 0, sizeof *l);
241:             strcpy(l->orig, s);
242:             l->basename = my_basename(l->orig);
243:             l->lineno = lineno;
244:             l->copy = copy;
245:             if (verbose >= verbose_load)
246:                printf("%4d %s%s\n", lineno,
247:                   copy? "COPY ": "", l->basename);
248: 
249:             lnsvn **pl;
250:             for (pl = base; *pl; pl = &(*pl)->next)
251:             {
252:                int cmp = strcmp(l->basename, (*pl)->basename);
253:                if (cmp < 0)
254:                   break;
255: 
256:                if (cmp == 0)
257:                {
258:                   print_err("dup name %s at lines %d and %d in %s",
259:                      l->basename, (*pl)->lineno, l->lineno, target);
260:                   free(l);
261:                   rtc = 1;
262:                }
263:             }
264: 
265:             l->next = *pl;
266:             *pl = l;
267:          }
268:       }
269: 
270:       int save_errno = 0;
271:       if (ferror(filepipe))
272:          save_errno = errno;
273:       if (fclose(filepipe) && save_errno == 0)
274:          save_errno = errno;
275:       if (save_errno)
276:       {
277:          rtc = 1;
278:          print_err("error reading from pipe: %s", strerror(save_errno));
279:       }
280:    }
281: 
282:    int wstatus;
283:    pid_t wpid = waitpid(pid, &wstatus, 0);
284:    if (wpid != pid)
285:    {
286:       print_err("waitpid failure: %s", strerror(errno));
287:    }
288:    else if (WIFEXITED(wstatus))
289:    {
290:       if (WEXITSTATUS(wstatus) != 0)
291:          rtc |= 1;
292:    }
293:    else if (WIFSIGNALED(wstatus))
294:    {
295:       int sig = WTERMSIG(wstatus);
296:       print_err("reading property interrupted by %s (%d)%s",
297:          strsignal(sig), sig, WCOREDUMP(wstatus)? ", core dumped": "");
298:       rtc = -1;
299:    }
300: 
301:    return rtc;
302: }
303: 
304: lnsvn **find_lnsvn(lnsvn **base, char const *name)
305: {
306:    assert(base);
307:    assert(name);
308: 
309:    lnsvn **p = base, *l;
310:    while ((l = *p) != NULL)
311:    {
312:       int cmp = strcmp(l->basename, name);
313:       if (cmp < 0)
314:       {
315:          p = &(*p)->next;
316:          continue;
317:       }
318: 
319:       return cmp > 0? NULL: p;
320:    }
321: 
322:    return NULL;
323: }
324: 
325: static int copyfile(lnsvn *l, struct stat *st_orig)
326: /*
327: * Assume the current directory is target.  Copy the original file
328: * (instead of linking it).
329: *
330: * Return 0 if ok
331: */
332: {
333:    int read_fd = open(l->orig, O_RDONLY);
334:    if (read_fd < 0)
335:    {
336:       print_err("Cannot read %s: %s", l->orig, strerror(errno));
337:       return -1;
338:    }
339: 
340:    int write_fd = open(l->basename, O_WRONLY| O_CREAT,
341:       st_orig->st_mode & ~S_IFMT);
342:    if (write_fd < 0)
343:    {
344:       print_err("Cannot write %s: %s", l->basename, strerror(errno));
345:       close(read_fd);
346:       return -1;
347:    }
348: 
349:    off_t size = st_orig->st_size;
350:    do
351:    {
352:       off_t out = sendfile(write_fd, read_fd, NULL, size);
353:       if (out <= 0)
354:          break;
355: 
356:       size -= out;
357:    } while (size > 0);
358: 
359:    int rtc = 0;
360:    if (size != 0)
361:    {
362:       print_err("Cannot copy %s: %s", l->orig, strerror(errno));
363:       rtc = 1;
364:    }
365: 
366:    const struct timespec times[2] =
367:    {{0, UTIME_OMIT},
368:    {st_orig->st_mtim.tv_sec, st_orig->st_mtim.tv_nsec}};
369: 
370:    if (futimens(write_fd, times) != 0)
371:    {
372:       print_err("Cannot set time on %s: %s",
373:          l->basename, strerror(errno));
374:       rtc = 1;
375:    }
376: 
377:    close(read_fd);
378:    close(write_fd);
379: 
380:    return rtc;
381: } 
382: 
383: static int run_target(char const *target)
384: /*
385: * Assume the current directory is target.  Obtain the linked list of
386: * original files, then read the target and check the corresponding file
387: * for each regular file.
388: */
389: {
390:    lnsvn *base;
391:    int rtc = load_lsvn(target, &base);
392: 
393:    if (rtc >= 0)
394:    {
395:       DIR *dirp = opendir(".");
396:       if (dirp == NULL)
397:       {
398:          print_err("Cannot read %s: %s", target, strerror(errno));
399:          free_lnsvn(base);
400:          return -1;
401:       }
402: 
403:       struct dirent *dir;
404:       while ((dir = readdir(dirp)) != NULL)
405:       {
406:          struct stat st, st_orig;
407:          if (stat(dir->d_name, &st) != 0)
408:          {
409:             print_err("stat failure for %s/%s: %s",
410:                target, dir->d_name, strerror(errno));
411:             rtc = 1;
412:             continue;
413:          }
414: 
415:          if (!S_ISREG(st.st_mode))
416:             continue;
417: 
418:          lnsvn *l, **pl = find_lnsvn(&base, dir->d_name);
419:          if (pl == NULL)
420:          {
421:             print_err("%s/%s doesn't appear in property",
422:                target, dir->d_name);
423:             continue;
424:          }
425: 
426:          l = *pl;
427:          assert(strcmp(l->basename, dir->d_name) == 0);
428: 
429:          if (stat(l->orig, &st_orig) != 0)
430:          {
431:             print_err("cannot stat %s at line %d of %s",
432:                l->orig, l->lineno, target);
433:             *pl = l->next; // remove from list
434:             free(l);
435:             rtc = 1;
436:             continue;
437:          }
438: 
439:          if (st.st_ino == st_orig.st_ino)
440:          {
441:             if (verbose >= verbose_file)
442:                printf("%4d %s%s: same ino (%jd)\n", l->lineno,
443:                   l->copy? "COPY ": "", l->basename,
444:                   (intmax_t)st.st_ino);
445:             *pl = l->next;
446:             free(l);
447:             continue; // good
448:          }
449: 
450:          if (!S_ISREG(st_orig.st_mode))
451:          {
452:             print_err("%s is not a regular file, at line %d of %s",
453:                l->orig, l->lineno, target);
454:             rtc = 1;
455:             *pl = l->next;
456:             free(l);
457:             continue;
458:          }
459: 
460:          if (l->copy)
461:          {
462:             if (st_orig.st_mtim.tv_sec != st.st_mtim.tv_sec ||
463:                st_orig.st_mtim.tv_nsec != st.st_mtim.tv_nsec ||
464:                st_orig.st_size != st.st_size)
465:             {
466:                rtc = copyfile(l, &st_orig);
467:                if (verbose >= verbose_file && rtc == 0)
468:                   printf("%4d COPY %s: copied\n",
469:                   l->lineno, l->basename);
470:             }
471:             else if (verbose >= verbose_file)
472:                printf("%4d COPY %s: same time/size (%jd.%ld/%jd)\n",
473:                   l->lineno, l->basename,
474:                   (intmax_t)st.st_mtim.tv_sec,
475:                   st.st_mtim.tv_nsec, (intmax_t)st.st_size);
476:          }
477:          else if (unlink(dir->d_name) || link(l->orig, dir->d_name))
478:          {
479:             print_err("cannot re-link %s/%s to %s: %s",
480:                target, dir->d_name, l->orig, strerror(errno));
481:             rtc = 1;
482:          }
483:          else if (verbose >= verbose_file)
484:             printf("%4d %s: relinked\n", l->lineno, l->basename);
485:          *pl = l->next;
486:          free(l);
487:       }
488:       closedir(dirp);
489: 
490:       // Now for the remaining items
491:       for (lnsvn *l = base; l; l = l->next)
492:       {
493:          if (l->copy)
494:          {
495:             struct stat st;
496:             int st_rc = stat(l->orig, &st);
497:             if (st_rc == 0 && copyfile(l, &st) == 0)
498:                printf("copied %s to %s\n", l->orig, target);
499:             else if (st_rc != 0)
500:             {
501:                print_err("cannot copy %s to %s/%s: %s",
502:                   l->orig, target, l->basename, strerror(errno));
503:                rtc = 1;
504:             }
505:          }
506:          else if (link(l->orig, l->basename) == 0)
507:             printf("add link to %s in %s\n", l->orig, target);
508:          else
509:          {
510:             print_err("cannot link %s to %s/%s: %s",
511:                l->orig, target, l->basename, strerror(errno));
512:             rtc = 1;
513:          }
514:       }
515:    }
516: 
517:    free_lnsvn(base);
518:    return rtc;
519: }
520: 
521: static int run_target_cd(char const *target)
522: /*
523: * Change directory and call run_target.
524: */
525: {
526:    if (target == NULL || *target == 0)
527:       return 1;
528: 
529:    struct stat st;
530:    int rtc = stat(target, &st);
531:    if (rtc != 0 || !S_ISDIR(st.st_mode))
532:    {
533:       print_err("%s %s", target, rtc?
534:          strerror(errno): "is not a directory");
535:       return -1;
536:    }
537: 
538:    char cwd[PATH_MAX];
539:    if (getcwd(cwd, sizeof cwd) == NULL)
540:    {
541:       print_err("getcwd failure: %s", strerror(errno));
542:       return -1;
543:    }
544: 
545:    if (chdir(target) != 0)
546:    {
547:       print_err("cannot chdir to %s: %s", target, strerror(errno));
548:       return -1;
549:    }
550: 
551:    rtc = run_target(target);
552: 
553:    chdir(cwd);
554:    return rtc;
555: }
556: 
557: int main(int argc, char *argv[])
558: {
559:    program_name = my_basename(argv[0]);
560: 
561:    int i, opt = 1, target = 0, errs = 0;
562:    for (i = 1; i < argc; ++i)
563:    {
564:       char *a = argv[i];
565:       if (a[0]== '-' && opt)
566:       {
567:          int ch;
568:          while ((ch = *(unsigned char*)++a) != 0)
569:          {
570:             switch (ch)
571:             {
572:                case '-': // end of options
573:                   opt = 0;
574:                   break;
575: 
576:                case 'v':
577:                   ++verbose;
578:                   break;
579: 
580:                default:
581:                   fprintf(stderr, "invalid option %c in %s\n",
582:                      ch, argv[i]);
583:                   ++errs;
584:                   break;
585:             }
586:          }
587:       }
588:       else
589:       {
590:          ++target;
591:          unsigned l = strlen(a);
592:          if (l > 0)
593:          {
594:             --l;
595:             if (l > 0 && a[l] == '/')  // remove trailing slash except root
596:                a[l] = 0;
597:             int rtc = run_target_cd(a);
598:             if (rtc)
599:             {
600:                ++errs;
601:                if (rtc < 0)
602:                   break;
603:             }
604:          }
605:       }
606:    }
607: 
608:    /*
609:    * Take LINKS as the default target.
610:    */
611:    if (errs == 0 && target == 0)
612:       errs += run_target_cd("LINKS");
613: 
614:    return errs != 0;
615: }
616: 
zero rights

The only option the program takes is -v for verbosity. It can be set multiple times.