Simulator
This section introduces fsck.py
, a simple simulator you can use to better understand how file system corruptions can be detected (and potentially repaired). Please see the previous lesson for details on how to run the simulator.
#! /usr/bin/env python from __future__ import print_function import random import string from optparse import OptionParser DEBUG = False def dprint(str): if DEBUG: print(str) printOps = True printState = True printFinal = True class bitmap: def __init__(self, size): self.size = size self.bmap = [] for num in range(size): self.bmap.append(0) self.numAllocated = 0 def corrupt(self, which): assert(which < self.size and which >= 0) # invert map self.bmap[which] = 1 - self.bmap[which] return def alloc(self): if self.numAllocated == self.size: return -1 while True: num = random.randint(0, self.size-1) if self.bmap[num] == 0: self.bmap[num] = 1 self.numAllocated += 1 return num return def findFree(self): if self.numAllocated == self.size: return -1 while True: num = random.randint(0, self.size-1) if self.bmap[num] == 0: return num return def free(self, num): assert(self.bmap[num] == 1) self.numAllocated -= 1 self.bmap[num] = 0 def markAllocated(self, num): assert(self.bmap[num] == 0) self.numAllocated += 1 self.bmap[num] = 1 def numFree(self): return self.size - self.numAllocated def dump(self): s = '' for i in range(len(self.bmap)): s += str(self.bmap[i]) return s class block: def __init__(self, ftype): assert(ftype == 'd' or ftype == 'f' or ftype == 'free') self.ftype = ftype # only for directories, properly a subclass but who cares self.dirUsed = 0 self.maxUsed = 32 self.dirList = [] self.data = '' def dump(self): if self.ftype == 'free': return '[]' elif self.ftype == 'd': rc = '' for d in self.dirList: # d is of the form ('name', inum) short = '(%s,%s)' % (d[0], d[1]) if rc == '': rc = short else: rc += ' ' + short return '['+rc+']' # return '%s' % self.dirList else: return '[%s]' % self.data def setType(self, ftype): assert(self.ftype == 'free') self.ftype = ftype def getType(self): return self.ftype def addData(self, data): assert(self.ftype == 'f') self.data = data def getNumEntries(self): assert(self.ftype == 'd') return self.dirUsed def getFreeEntries(self): assert(self.ftype == 'd') return self.maxUsed - self.dirUsed def getEntry(self, num): assert(self.ftype == 'd') assert(num < self.dirUsed) return self.dirList[num] def addDirEntry(self, name, inum): assert(self.ftype == 'd') self.dirList.append((name, inum)) self.dirUsed += 1 assert(self.dirUsed <= self.maxUsed) def delDirEntry(self, name): assert(self.ftype == 'd') tname = name.split('/') dname = tname[len(tname) - 1] for i in range(len(self.dirList)): if self.dirList[i][0] == dname: self.dirList.pop(i) self.dirUsed -= 1 return assert(1 == 0) def dirEntryExists(self, name): assert(self.ftype == 'd') for d in self.dirList: if name == d[0]: return True return False def getDirEntries(self): return self.dirList def setDirEntry(self, index, contents): self.dirList[index] = contents return def free(self): assert(self.ftype != 'free') if self.ftype == 'd': # check for only dot, dotdot here assert(self.dirUsed == 2) self.dirUsed = 0 self.data = '' self.ftype = 'free' class inode: def __init__(self, ftype='free', addr=-1, refCnt=1): self.setAll(ftype, addr, refCnt) def setAll(self, ftype, addr, refCnt): assert(ftype == 'd' or ftype == 'f' or ftype == 'free') self.ftype = ftype self.addr = addr self.refCnt = refCnt def incRefCnt(self): self.refCnt += 1 def decRefCnt(self): self.refCnt -= 1 def getRefCnt(self): return self.refCnt def setType(self, ftype): assert(ftype == 'd' or ftype == 'f' or ftype == 'free') self.ftype = ftype def setAddr(self, block): self.addr = block def getSize(self): if self.addr == -1: return 0 else: return 1 def getAddr(self): return self.addr def getType(self): return self.ftype def free(self): self.ftype = 'free' self.addr = -1 class fs: def __init__(self, numInodes, numData, seedCorrupt, solve): self.numInodes = numInodes self.numData = numData self.seedCorrupt = seedCorrupt self.solve = solve self.ibitmap = bitmap(self.numInodes) self.inodes = [] for i in range(self.numInodes): self.inodes.append(inode()) self.dbitmap = bitmap(self.numData) self.data = [] for i in range(self.numData): self.data.append(block('free')) # root inode self.ROOT = 0 # create root directory self.ibitmap.markAllocated(self.ROOT) self.inodes[self.ROOT].setAll('d', 0, 2) self.dbitmap.markAllocated(self.ROOT) self.data[0].setType('d') self.data[0].addDirEntry('.', self.ROOT) self.data[0].addDirEntry('..', self.ROOT) # these is just for the fake workload generator self.files = [] self.dirs = ['/'] self.nameToInum = {'/':self.ROOT} def dump(self): print('inode bitmap', self.ibitmap.dump()) print('inodes ', end='') for i in range(0,self.numInodes): ftype = self.inodes[i].getType() if ftype == 'free': print('[]', end=' ') else: print('[%s a:%s r:%d]' % (ftype, self.inodes[i].getAddr(), self.inodes[i].getRefCnt()), end=' ') print('') print('data bitmap ', self.dbitmap.dump()) print('data ', end='') for i in range(self.numData): print(self.data[i].dump(), end=' ') print('') return def makeName(self): return random.choice(list(string.ascii_lowercase)) def inodeAlloc(self): return self.ibitmap.alloc() def inodeFree(self, inum): self.ibitmap.free(inum) self.inodes[inum].free() def dataAlloc(self): return self.dbitmap.alloc() def dataFree(self, bnum): self.dbitmap.free(bnum) self.data[bnum].free() def getParent(self, name): tmp = name.split('/') if len(tmp) == 2: return '/' pname = '' for i in range(1, len(tmp)-1): pname = pname + '/' + tmp[i] return pname def deleteFile(self, tfile): if printOps: print('unlink("%s");' % tfile) inum = self.nameToInum[tfile] ftype = self.inodes[inum].getType() if self.inodes[inum].getRefCnt() == 1: # free data blocks first dblock = self.inodes[inum].getAddr() if dblock != -1: self.dataFree(dblock) # then free inode self.inodeFree(inum) else: self.inodes[inum].decRefCnt() # remove from parent directory parent = self.getParent(tfile) # print '--> delete from parent', parent pinum = self.nameToInum[parent] # print '--> delete from parent inum', pinum pblock = self.inodes[pinum].getAddr() # FIXED BUG: DECREASE PARENT INODE REF COUNT if needed (thanks to Srinivasan Thirunarayanan) if ftype == 'd': self.inodes[pinum].decRefCnt() # print '--> delete from parent addr', pblock self.data[pblock].delDirEntry(tfile) # finally, remove from files list self.files.remove(tfile) return 0 def createLink(self, target, newfile, parent): # find info about parent parentInum = self.nameToInum[parent] # is there room in the parent directory? pblock = self.inodes[parentInum].getAddr() if self.data[pblock].getFreeEntries() <= 0: dprint('*** createLink failed: no room in parent directory ***') return -1 # print 'is %s in directory %d' % (newfile, pblock) if self.data[pblock].dirEntryExists(newfile): dprint('*** createLink failed: not a unique name ***') return -1 # now, find inumber of target tinum = self.nameToInum[target] self.inodes[tinum].incRefCnt() # DONT inc parent ref count - only for directories # self.inodes[parentInum].incRefCnt() # now add to directory tmp = newfile.split('/') ename = tmp[len(tmp)-1] self.data[pblock].addDirEntry(ename, tinum) return tinum def createFile(self, parent, newfile, ftype): # find info about parent parentInum = self.nameToInum[parent] # is there room in the parent directory? pblock = self.inodes[parentInum].getAddr() if self.data[pblock].getFreeEntries() <= 0: dprint('*** createFile failed: no room in parent directory ***') return -1 # have to make sure file name is unique block = self.inodes[parentInum].getAddr() # print 'is %s in directory %d' % (newfile, block) if self.data[block].dirEntryExists(newfile): dprint('*** createFile failed: not a unique name ***') return -1 # find free inode inum = self.inodeAlloc() if inum == -1: dprint('*** createFile failed: no inodes left ***') return -1 # if a directory, have to allocate directory block for basic (., ..) info fblock = -1 if ftype == 'd': refCnt = 2 fblock = self.dataAlloc() if fblock == -1: dprint('*** createFile failed: no data blocks left ***') self.inodeFree(inum) return -1 else: self.data[fblock].setType('d') self.data[fblock].addDirEntry('.', inum) self.data[fblock].addDirEntry('..', parentInum) else: refCnt = 1 # now ok to init inode properly self.inodes[inum].setAll(ftype, fblock, refCnt) # inc parent ref count if this is a directory if ftype == 'd': self.inodes[parentInum].incRefCnt() # and add to directory of parent self.data[pblock].addDirEntry(newfile, inum) return inum def writeFile(self, tfile, data): inum = self.nameToInum[tfile] curSize = self.inodes[inum].getSize() dprint('writeFile: inum:%d cursize:%d refcnt:%d' % (inum, curSize, self.inodes[inum].getRefCnt())) if curSize == 1: dprint('*** writeFile failed: file is full ***') return -1 fblock = self.dataAlloc() if fblock == -1: dprint('*** writeFile failed: no data blocks left ***') return -1 else: self.data[fblock].setType('f') self.data[fblock].addData(data) self.inodes[inum].setAddr(fblock) if printOps: print('fd=open("%s", O_WRONLY|O_APPEND); write(fd, buf, BLOCKSIZE); close(fd);' % tfile) return 0 def doDelete(self): dprint('doDelete') if len(self.files) == 0: return -1 dfile = self.files[int(random.random() * len(self.files))] dprint('try delete(%s)' % dfile) return self.deleteFile(dfile) def doLink(self): dprint('doLink') if len(self.files) == 0: return -1 parent = self.dirs[int(random.random() * len(self.dirs))] nfile = self.makeName() # pick random target target = self.files[int(random.random() * len(self.files))] # MUST BE A FILE, not a DIRECTORY (this will always be true here) # get full name of newfile if parent == '/': fullName = parent + nfile else: fullName = parent + '/' + nfile dprint('try createLink(%s %s %s)' % (target, nfile, parent)) inum = self.createLink(target, nfile, parent) if inum >= 0: self.files.append(fullName) self.nameToInum[fullName] = inum if printOps: print('link("%s", "%s");' % (target, fullName)) return 0 return -1 def doCreate(self, ftype): dprint('doCreate') parent = self.dirs[int(random.random() * len(self.dirs))] nfile = self.makeName() if ftype == 'd': tlist = self.dirs else: tlist = self.files if parent == '/': fullName = parent + nfile else: fullName = parent + '/' + nfile dprint('try createFile(%s %s %s)' % (parent, nfile, ftype)) inum = self.createFile(parent, nfile, ftype) if inum >= 0: tlist.append(fullName) self.nameToInum[fullName] = inum if parent == '/': parent = '' if ftype == 'd': if printOps: print('mkdir("%s/%s");' % (parent, nfile)) else: if printOps: print('creat("%s/%s");' % (parent, nfile)) return 0 return -1 def doAppend(self): dprint('doAppend') if len(self.files) == 0: return -1 afile = self.files[int(random.random() * len(self.files))] dprint('try writeFile(%s)' % afile) data = chr(ord('a') + int(random.random() * 26)) rc = self.writeFile(afile, data) return rc def pickRandom(self, match): inodes = [] for i in range(len(self.inodes)): if self.inodes[i].getType() == match: inodes.append(i) assert(len(inodes) > 0) return random.choice(inodes) def findFreeData(self): data = [] for i in range(len(self.data)): if self.data[i].getType() == 'free': data.append(i) assert(len(data) > 0) return random.choice(data) # inode bitmap 1111010000000000 # inodes [d a:0 r:3] [f a:-1 r:1] [f a:6 r:4] [f a:-1 r:1] [] [d a:3 r:2] [] [] [] [] [] [] [] [] [] [] # data bitmap 1001001000000000 # data [(.,0) (..,0) (v,2) (d,2) (e,2) (n,2) (s,5)] [] [] [(.,5) (..,0) (w,3) (k,1)] [] [] [t] [] [] [] [] [] [] [] [] [] def corrupt(self, whichCorrupt): random.seed(self.seedCorrupt) num = random.randint(0, 11) # print('RANDINT', num) if whichCorrupt != -1: num = whichCorrupt if self.solve: print('CORRUPTION::', end='') if num == 0: # data bitmap badBit = random.randint(0, self.numData-1) if self.solve: print('DATA BITMAP corrupt bit %d' % badBit) self.dbitmap.corrupt(badBit) elif num == 1: # inode bitmap badBit = random.randint(0, self.numInodes-1) if self.solve: print('INODE BITMAP corrupt bit %d' % badBit) self.ibitmap.corrupt(badBit) elif num == 2 or num == 8: # corrupt live file inode refcnt if num == 2: badInode = self.pickRandom('f') else: badInode = self.pickRandom('d') if random.randint(0, 1) == 0: if self.solve: print('INODE %d refcnt increased' % badInode) self.inodes[badInode].incRefCnt() else: if self.solve: print('INODE %d refcnt decreased' % badInode) self.inodes[badInode].decRefCnt() elif num == 3 or num == 9: # create new fake inode badInode = self.pickRandom('free') if self.solve: print('INODE %d orphan' % badInode) if num == 3: self.inodes[badInode].setAll('f', -1, 1) else: self.inodes[badInode].setAll('d', -1, 1) elif num == 4 or num == 10: # inode switched to point to bad block if num == 4: badInode = self.pickRandom('f') else: badInode = self.pickRandom('d') badData = self.findFreeData() if self.solve: print('INODE %d points to dead block %d' % (badInode, badData)) self.inodes[badInode].setAddr(badData) elif num == 5 or num == 11: # normal inode gets its type switched if num == 5: badInode = self.pickRandom('f') else: badInode = self.pickRandom('d') if self.solve: print('INODE %d was type file, now dir' % badInode) if num == 5: self.inodes[badInode].setType('d') else: self.inodes[badInode].setType('f') elif num == 6: # corrupt a directory block by making one tuple refer to a BAD INODE badInode = self.pickRandom('d') addr = self.inodes[badInode].getAddr() dirList = self.data[addr].getDirEntries() # this is a list of (name, inodeNum) tuples badIndex = random.randint(0, len(dirList)-1) badEntry = dirList[badIndex] badInodeNum = self.ibitmap.findFree() if self.solve: print('INODE %d with directory %s:\n entry (\'%s\', %d) altered to refer to unallocated inode (%d)' % (badInode, dirList, badEntry[0], badEntry[1], badInodeNum)) self.data[addr].setDirEntry(badIndex, (badEntry[0], badInodeNum)) elif num == 7: # corrupt a directory block by making one tuple refer to a DIFFERENT NAME badInode = self.pickRandom('d') addr = self.inodes[badInode].getAddr() dirList = self.data[addr].getDirEntries() # this is a list of (name, inodeNum) tuples badIndex = random.randint(0, len(dirList)-1) badEntry = dirList[badIndex] badName = self.makeName() if self.solve: print('INODE %d with directory %s:\n entry (\'%s\', %d) altered to refer to different name (%s)' % (badInode, dirList, badEntry[0], badEntry[1], badName)) self.data[addr].setDirEntry(badIndex, (badName, badEntry[1])) else: print('No such corruption (%d)' % whichCorrupt) exit(1) return def run(self, numRequests, dontCorrupt, whichCorrupt): self.percentMkdir = 0.40 self.percentWrite = 0.40 self.percentDelete = 0.20 self.numRequests = 20 for i in range(numRequests): rc = -1 while rc == -1: r = random.random() if r < 0.3: rc = self.doAppend() dprint('doAppend rc:%d' % rc) elif r < 0.5: rc = self.doDelete() dprint('doDelete rc:%d' % rc) elif r < 0.7: rc = self.doLink() dprint('doLink rc:%d' % rc) else: if random.random() < 0.75: rc = self.doCreate('f') dprint('doCreate(f) rc:%d' % rc) else: rc = self.doCreate('d') dprint('doCreate(d) rc:%d' % rc) if self.ibitmap.numFree() == 0: print('File system out of inodes; rerun with more via command-line flag?') exit(1) if self.dbitmap.numFree() == 0: print('File system out of data blocks; rerun with more via command-line flag?') exit(1) if self.solve: print('Initial state of file system:\n') self.dump() print('') if not dontCorrupt: self.corrupt(whichCorrupt) if self.solve: print('') print('Final state of file system:\n') self.dump() print('') if not self.solve and not dontCorrupt: print('Can you figure out how the file system was corrupted?\n') if not self.solve and dontCorrupt: print('Can you figure out which files and directories exist?\n') if printFinal: print('\nSummary of files, directories::') print(' Files: ', self.files) print(' Directories:', self.dirs) print('') # # main program # parser = OptionParser() parser.add_option('-s', '--seed', default=0, help='first random seed (for a filesystem)', action='store', type='int', dest='seed') parser.add_option('-S', '--seedCorrupt', default=0, help='second random seed (for corruptions)', action='store', type='int', dest='seedCorrupt') parser.add_option('-i', '--numInodes', default=16, help='number of inodes in file system', action='store', type='int', dest='numInodes') parser.add_option('-d', '--numData', default=16, help='number of data blocks in file system', action='store', type='int', dest='numData') parser.add_option('-n', '--numRequests', default=15, help='number of requests to simulate', action='store', type='int', dest='numRequests') parser.add_option('-p', '--printFinal', default=False, help='print the final set of files/dirs', action='store_true', dest='printFinal') parser.add_option('-w', '--whichCorrupt',default=-1, help='do a specific corruption', action='store', type='int', dest='whichCorrupt') parser.add_option('-c', '--compute', default=False, help='compute answers for me', action='store_true', dest='solve') parser.add_option('-D', '--dontCorrupt', default=False, help='actually corrupt file system', action='store_true', dest='dontCorrupt') (options, args) = parser.parse_args() print('ARG seed', options.seed) print('ARG seedCorrupt', options.seedCorrupt) print('ARG numInodes', options.numInodes) print('ARG numData', options.numData) print('ARG numRequests', options.numRequests) print('ARG printFinal', options.printFinal) print('ARG whichCorrupt',options.whichCorrupt) print('ARG dontCorrupt', options.dontCorrupt) print('') random.seed(options.seed) printState = False printOps = False #if options.solve: # printOps = True # printState = True printFinal = options.printFinal # # have to generate RANDOM requests to the file system # that are VALID! # f = fs(options.numInodes, options.numData, options.seedCorrupt, options.solve) # # ops: mkdir rmdir : create delete : append write # f.run(options.numRequests, options.dontCorrupt, options.whichCorrupt)
Questions
-
First, run
./fsck.py -D
; this flag turns off any corruption, and thus you can use it to generate a random file system, and see if you can determine which files and directories are in there. So, go ahead and do that! Use the-p
flag to see if you were right. Try this for a few different randomly-generated file systems by setting the seed (-s
) to different values, like 1, 2, and 3. -
Now, let’s introduce a corruption. Run
./fsck.py -S 1
to start. Can you see what inconsistency is introduced? How would you fix it in a real file system repair tool? Use-c
to check if you were right. -
Change the seed to
-S 3
or-S 19
; which inconsistency do you see? Use-c
to check your answer. What is different in these two cases? -
Change the seed to
-S 5
; which inconsistency do you see? How hard would it be to fix this problem in an automatic way? Use-c
to check your answer. Then, introduce a similar inconsistency with-S 38
; is this harder/possible to detect? Finally, use-S 642
; is this inconsistency detectable? If so, how would you fix the file system? -
Change the seed to
-S 6
or-S 13
; which inconsistency do you see? Use-c
to check your answer. What is the difference across these two cases? What should the repair tool do when encountering such a situation? -
Change the seed to
-S 9
; which inconsistency do you see? Use-c
to check your answer. Which piece of information should a check- and-repair tool trust in this case? -
Change the seed to
-S 15
; which inconsistency do you see? Use-c
to check your answer. What can a repair tool do in this case? If no repair is possible, how much data is lost? -
Change the seed to
-S 10
; which inconsistency do you see? Use-c
to check your answer. Is there redundancy in the file system structure here that can help a repair? -
Change the seed to
-S 16
and-S 20
; which inconsistency do you see? Use-c
to check your answer. How should the repair tool fix the problem?
Get hands-on with 1400+ tech skills courses.