// // FileSystemQueryable.cs // // Copyright (C) 2004 Novell, Inc. // // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), // to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. // using System; using System.Collections; using System.IO; using System.Reflection; using System.Text; using System.Threading; using Beagle.Daemon; using Beagle.Util; namespace Beagle.Daemon.FileSystemQueryable { [QueryableFlavor (Name="Files", Domain=QueryDomain.Local, RequireInotify=false)] [PropertyKeywordMapping (Keyword="extension", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. extension:jpeg. Use extension: to search in files with no extension.")] [PropertyKeywordMapping (Keyword="ext", PropertyName="beagle:FilenameExtension", IsKeyword=true, Description="File extension, e.g. ext:jpeg. Use ext: to search in files with no extension.")] public class FileSystemQueryable : LuceneQueryable { static public bool Debug = false; private const string SplitFilenamePropKey = "beagle:SplitFilename"; public const string ExactFilenamePropKey = "beagle:ExactFilename"; public const string TextFilenamePropKey = "beagle:Filename"; public const string NoPunctFilenamePropKey = "beagle:NoPunctFilename"; public const string FilenameExtensionPropKey = "beagle:FilenameExtension"; public const string ParentDirUriPropKey = LuceneQueryingDriver.PrivateNamespace + "ParentDirUri"; public const string IsDirectoryPropKey = LuceneQueryingDriver.PrivateNamespace + "IsDirectory"; // History: // 1: Initially set to force a reindex due to NameIndex changes. // 2: Overhauled everything to use new lucene infrastructure. // 3: Switched to UTC for all times, changed the properties a bit. // 4: Changed the key of TextFilenamePropKey to beagle:Filename - it might be useful in clients. // Make SplitFilenamePropKey unstored const int MINOR_VERSION = 4; private object big_lock = new object (); private IFileEventBackend event_backend; // This is the task that walks the tree structure private TreeCrawlTask tree_crawl_task; // This is the task that finds the next place that // needs to be crawled in the tree and spawns off // the appropriate IndexableGenerator. private FileCrawlTask file_crawl_task; private ArrayList roots = new ArrayList (); private ArrayList roots_by_path = new ArrayList (); private FileNameFilter filter; // This is just a copy of the LuceneQueryable's QueryingDriver // cast into the right type for doing internal->external Uri // lookups. private LuceneNameResolver name_resolver; ////////////////////////////////////////////////////////////////////////// private Hashtable cached_uid_by_path = new Hashtable (); ////////////////////////////////////////////////////////////////////////// public FileSystemQueryable () : base ("FileSystemIndex", MINOR_VERSION) { // Set up our event backend if (Inotify.Enabled) { Logger.Log.Debug ("Starting Inotify Backend"); event_backend = new InotifyBackend (); } else { Logger.Log.Debug ("Creating null file event backend"); event_backend = new NullFileEventBackend (); } tree_crawl_task = new TreeCrawlTask (new TreeCrawlTask.Handler (AddDirectory)); tree_crawl_task.Source = this; file_crawl_task = new FileCrawlTask (this); file_crawl_task.Source = this; name_resolver = (LuceneNameResolver) Driver; PreloadDirectoryNameInfo (); // Setup our file-name filter filter = new FileNameFilter (this); // Do the right thing when paths expire DirectoryModel.ExpireEvent += new DirectoryModel.ExpireHandler (ExpireDirectoryPath); } override protected IFileAttributesStore BuildFileAttributesStore () { return new FileAttributesStore_Mixed (IndexDirectory, IndexFingerprint); } override protected LuceneQueryingDriver BuildLuceneQueryingDriver (string index_name, int minor_version, bool read_only_mode) { return new LuceneNameResolver (index_name, minor_version, read_only_mode); } public FileNameFilter Filter { get { return filter; } } ////////////////////////////////////////////////////////////////////////// // // This is where we build our Indexables // public static void AddStandardPropertiesToIndexable (Indexable indexable, string name, Guid parent_id, bool mutable) { StringBuilder sb; sb = new StringBuilder (); string no_ext, ext, no_punct; no_ext = Path.GetFileNameWithoutExtension (name); ext = Path.GetExtension (name).ToLower (); sb.Append (no_ext); for (int i = 0; i < sb.Length; ++i) if (! Char.IsLetterOrDigit (sb [i])) sb [i] = ' '; no_punct = sb.ToString (); Property prop; prop = Property.NewKeyword (ExactFilenamePropKey, name); prop.IsMutable = mutable; indexable.AddProperty (prop); prop = Property.New (TextFilenamePropKey, no_ext); prop.IsMutable = mutable; indexable.AddProperty (prop); prop = Property.New (NoPunctFilenamePropKey, no_punct); prop.IsMutable = mutable; indexable.AddProperty (prop); prop = Property.NewUnsearched (FilenameExtensionPropKey, ext); prop.IsMutable = mutable; indexable.AddProperty (prop); string str; str = StringFu.FuzzyDivide (no_ext); prop = Property.NewUnstored (SplitFilenamePropKey, str); prop.IsMutable = mutable; indexable.AddProperty (prop); if (parent_id == Guid.Empty) return; str = GuidFu.ToUriString (parent_id); // We use the uri here to recycle terms in the index, // since each directory's uri will already be indexed. prop = Property.NewUnsearched (ParentDirUriPropKey, str); prop.IsMutable = mutable; indexable.AddProperty (prop); } public static void AddStandardPropertiesToIndexable (Indexable indexable, string name, DirectoryModel parent, bool mutable) { AddStandardPropertiesToIndexable (indexable, name, parent == null ? Guid.Empty : parent.UniqueId, mutable); indexable.LocalState ["Parent"] = parent; } public static Indexable DirectoryToIndexable (string path, Guid id, DirectoryModel parent) { Indexable indexable; try { indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id)); indexable.MimeType = "inode/directory"; indexable.NoContent = true; indexable.Timestamp = Directory.GetLastWriteTimeUtc (path); } catch (IOException) { // Looks like the directory was deleted. return null; } string name; if (parent == null) name = path; else name = Path.GetFileName (path); AddStandardPropertiesToIndexable (indexable, name, parent, true); Property prop; prop = Property.NewBool (IsDirectoryPropKey, true); prop.IsMutable = true; // we want this in the secondary index, for efficiency indexable.AddProperty (prop); indexable.LocalState ["Path"] = path; return indexable; } public static Indexable FileToIndexable (string path, Guid id, DirectoryModel parent, bool crawl_mode) { Indexable indexable; try { indexable = new Indexable (IndexableType.Add, GuidFu.ToUri (id)); indexable.Timestamp = File.GetLastWriteTimeUtc (path); indexable.ContentUri = UriFu.PathToFileUri (path); indexable.Crawled = crawl_mode; indexable.Filtering = Beagle.IndexableFiltering.Always; } catch (IOException) { // Looks like the file was deleted. return null; } AddStandardPropertiesToIndexable (indexable, Path.GetFileName (path), parent, true); indexable.LocalState ["Path"] = path; return indexable; } private static Indexable NewRenamingIndexable (string name, Guid id, DirectoryModel parent, string last_known_path) { Indexable indexable; indexable = new Indexable (IndexableType.PropertyChange, GuidFu.ToUri (id)); AddStandardPropertiesToIndexable (indexable, name, parent, true); indexable.LocalState ["Id"] = id; indexable.LocalState ["LastKnownPath"] = last_known_path; return indexable; } ////////////////////////////////////////////////////////////////////////// // // Mapping from directory ids to paths // private Hashtable dir_models_by_id = new Hashtable (); private Hashtable name_info_by_id = new Hashtable (); // We fall back to using the name information in the index // until we've fully constructed our set of DirectoryModels. private void PreloadDirectoryNameInfo () { ICollection all; all = name_resolver.GetAllDirectoryNameInfo (); foreach (LuceneNameResolver.NameInfo info in all) name_info_by_id [info.Id] = info; } // This only works for directories. private string UniqueIdToDirectoryName (Guid id) { DirectoryModel dir; dir = dir_models_by_id [id] as DirectoryModel; if (dir != null) return dir.FullName; LuceneNameResolver.NameInfo info; info = name_info_by_id [id] as LuceneNameResolver.NameInfo; if (info != null) { if (info.ParentId == Guid.Empty) // i.e. this is a root return info.Name; else { string parent_name; parent_name = UniqueIdToDirectoryName (info.ParentId); if (parent_name == null) return null; return Path.Combine (parent_name, info.Name); } } return null; } private void CacheDirectoryNameChange (Guid id, Guid new_parent_id, string new_name) { LuceneNameResolver.NameInfo info; info = name_info_by_id [id] as LuceneNameResolver.NameInfo; if (info != null) { info.ParentId = new_parent_id; info.Name = new_name; } } private string ToFullPath (string name, Guid parent_id) { // This is the correct behavior for roots. if (parent_id == Guid.Empty) return name; string parent_name; parent_name = UniqueIdToDirectoryName (parent_id); if (parent_name == null) return null; return Path.Combine (parent_name, name); } // This works for both files and directories. private string UniqueIdToFullPath (Guid id) { // First, check if it is a directory. string path; path = UniqueIdToDirectoryName (id); if (path != null) return path; // If not, try to pull name information out of the index. LuceneNameResolver.NameInfo info; info = name_resolver.GetNameInfoById (id); if (info == null) return null; return ToFullPath (info.Name, info.ParentId); } private void RegisterId (string name, DirectoryModel dir, Guid id) { cached_uid_by_path [Path.Combine (dir.FullName, name)] = id; } private void ForgetId (string path) { cached_uid_by_path.Remove (path); } // This works for files. (It probably works for directories // too, but you should use one of the more efficient means // above if you know it is a directory.) private Guid NameAndParentToId (string name, DirectoryModel dir) { string path; path = Path.Combine (dir.FullName, name); Guid unique_id; if (cached_uid_by_path.Contains (path)) unique_id = (Guid) cached_uid_by_path [path]; else unique_id = name_resolver.GetIdByNameAndParentId (name, dir.UniqueId); return unique_id; } ////////////////////////////////////////////////////////////////////////// // // Directory-related methods // private Hashtable dir_models_by_path = new Hashtable (); private DirectoryModel GetDirectoryModelByPath (string path) { DirectoryModel dir; lock (dir_models_by_path) { dir = dir_models_by_path [path] as DirectoryModel; if (dir != null) return dir; } // Walk each root until we find the correct path foreach (DirectoryModel root in roots) { dir = root.WalkTree (path); if (dir != null) { lock (dir_models_by_path) dir_models_by_path [path] = dir; break; } } return dir; } private void ExpireDirectoryPath (string expired_path, Guid unique_id) { if (Debug) Logger.Log.Debug ("Expired '{0}'", expired_path); lock (dir_models_by_path) dir_models_by_path.Remove (expired_path); } public void AddDirectory (DirectoryModel parent, string name) { // Ignore the stuff we want to ignore. if (filter.Ignore (parent, name, true)) return; // FIXME: ! parent.HasChildWithName (name) if (parent != null && parent.HasChildWithName (name)) return; string path; path = (parent == null) ? name : Path.Combine (parent.FullName, name); if (Debug) Logger.Log.Debug ("Adding directory '{0}'", path, name); if (! Directory.Exists (path)) { Logger.Log.Error ("Can't add directory: '{0}' does not exist", path); return; } FileAttributes attr; attr = FileAttributesStore.Read (path); // Note that we don't look at the mtime of a directory when // deciding whether or not to index it. bool needs_indexing = false; if (attr == null) { // If it has no attributes, it definitely needs // indexing. needs_indexing = true; } else { // Make sure that it still has the same name as before. // If not, we need to re-index it. // We can do this since we preloaded all of the name // info in the directory via PreloadDirectoryNameInfo. string last_known_name; last_known_name = UniqueIdToDirectoryName (attr.UniqueId); if (last_known_name != path) { Logger.Log.Debug ("'{0}' now seems to be called '{1}'", last_known_name, path); needs_indexing = true; } } // If we can't descend into this directory, we want to // index it but not build a DirectoryModel for it. // FIXME: We should do the right thing when a // directory's permissions change. bool is_walkable; is_walkable = DirectoryWalker.IsWalkable (path); if (! is_walkable) Logger.Log.Debug ("Can't walk '{0}'", path); if (needs_indexing) ScheduleDirectory (name, parent, attr, is_walkable); else if (is_walkable) RegisterDirectory (name, parent, attr); } public void AddRoot (string path) { path = StringFu.SanitizePath (path); Logger.Log.Debug ("Adding root: {0}", path); if (roots_by_path.Contains (path)) { Logger.Log.Error ("Trying to add an existing root: {0}", path); return; } // We need to have the path key in the roots hashtable // for the filtering to work as we'd like before the root // is actually added. roots_by_path.Add (path); AddDirectory (null, path); } public void RemoveRoot (string path) { Logger.Log.Debug ("Removing root: {0}", path); if (! roots_by_path.Contains (path)) { Logger.Log.Error ("Trying to remove a non-existing root: {0}", path); return; } // Find our directory model for the root DirectoryModel dir; dir = GetDirectoryModelByPath (path); if (dir == null) { Logger.Log.Error ("Could not find directory-model for root: {0}", path); return; } // FIXME: Make sure we're emptying the crawler task of any sub-directories // to the root we're removing. It's not a big deal since we do an Ignore-check // in there, but it would be nice. roots_by_path.Remove (path); roots.Remove (dir); // Clean out the root from our directory cache. RemoveDirectory (dir); } private void ScheduleDirectory (string name, DirectoryModel parent, FileAttributes attr, bool is_walkable) { string path; path = (parent == null) ? name : Path.Combine (parent.FullName, name); Guid id; id = (attr == null) ? Guid.NewGuid () : attr.UniqueId; DateTime last_crawl; last_crawl = (attr == null) ? DateTime.MinValue : attr.LastWriteTime; Indexable indexable; indexable = DirectoryToIndexable (path, id, parent); if (indexable != null) { indexable.LocalState ["Name"] = name; indexable.LocalState ["LastCrawl"] = last_crawl; indexable.LocalState ["IsWalkable"] = is_walkable; Scheduler.Task task; task = NewAddTask (indexable); task.Priority = Scheduler.Priority.Delayed; ThisScheduler.Add (task); } } private bool RegisterDirectory (string name, DirectoryModel parent, FileAttributes attr) { string path; path = (parent == null) ? name : Path.Combine (parent.FullName, name); if (Debug) Logger.Log.Debug ("Registered directory '{0}' ({1})", path, attr.UniqueId); DateTime mtime; try { mtime = Directory.GetLastWriteTimeUtc (path); } catch (IOException) { Log.Debug ("Directory '{0}' ({1}) appears to have gone away", path, attr.UniqueId); return false; } DirectoryModel dir; if (parent == null) dir = DirectoryModel.NewRoot (big_lock, path, attr); else dir = parent.AddChild (name, attr); if (mtime > attr.LastWriteTime) { dir.State = DirectoryState.Dirty; if (Debug) Logger.Log.Debug ("'{0}' is dirty", path); } if (Debug) { if (dir.IsRoot) Logger.Log.Debug ("Created model '{0}'", dir.FullName); else Logger.Log.Debug ("Created model '{0}' with parent '{1}'", dir.FullName, dir.Parent.FullName); } // Add any roots we create to the list of roots if (dir.IsRoot) roots.Add (dir); // Add the directory to our by-id hash, and remove any NameInfo // we might have cached about it. dir_models_by_id [dir.UniqueId] = dir; name_info_by_id.Remove (dir.UniqueId); // Start watching the directory. dir.WatchHandle = event_backend.CreateWatch (path); // Schedule this directory for crawling. if (tree_crawl_task.Add (dir)) ThisScheduler.Add (tree_crawl_task); // Make sure that our file crawling task is active, // since presumably we now have something new to crawl. ActivateFileCrawling (); return true; } private void ForgetDirectoryRecursively (DirectoryModel dir) { foreach (DirectoryModel child in dir.Children) ForgetDirectoryRecursively (child); if (dir.WatchHandle != null) event_backend.ForgetWatch (dir.WatchHandle); dir_models_by_id.Remove (dir.UniqueId); // We rely on the expire event to remove it from dir_models_by_path } private void RemoveDirectory (DirectoryModel dir) { Uri uri; uri = GuidFu.ToUri (dir.UniqueId); Indexable indexable; indexable = new Indexable (IndexableType.Remove, uri); // Remember a copy of our external Uri, so that we can // easily remap it in the PostRemoveHook. indexable.LocalState ["RemovedUri"] = UriFu.PathToFileUri (dir.FullName); // Forget watches and internal references ForgetDirectoryRecursively (dir); // Calling Remove will expire the path names, // so name caches will be cleaned up accordingly. dir.Remove (); Scheduler.Task task; task = NewAddTask (indexable); // We *add* the indexable to *remove* the index item task.Priority = Scheduler.Priority.Immediate; ThisScheduler.Add (task); } public void RemoveDirectory (string path) { DirectoryModel dir = GetDirectoryModelByPath (path); if (dir != null) RemoveDirectory (dir); } private void MoveDirectory (DirectoryModel dir, DirectoryModel new_parent, // or null if we are just renaming string new_name) { if (dir == null) { Logger.Log.Warn ("Couldn't find DirectoryModel for directory moving to '{0}' in '{1}', so it was hopefully never there.", new_name, new_parent.FullName); AddDirectory (new_parent, new_name); return; } if (dir.IsRoot) throw new Exception ("Can't move root " + dir.FullName); // We'll need this later in order to generate the // right change notification. string old_path; old_path = dir.FullName; if (new_parent != null && new_parent != dir.Parent) dir.MoveTo (new_parent, new_name); else dir.Name = new_name; // Remember this by path lock (dir_models_by_path) dir_models_by_path [dir.FullName] = dir; CacheDirectoryNameChange (dir.UniqueId, dir.Parent.UniqueId, new_name); Indexable indexable; indexable = NewRenamingIndexable (new_name, dir.UniqueId, dir.Parent, // == new_parent old_path); indexable.LocalState ["OurDirectoryModel"] = dir; Scheduler.Task task; task = NewAddTask (indexable); task.Priority = Scheduler.Priority.Immediate; // Danger Will Robinson! // We need to use BlockUntilNoCollision to get the correct notifications // in a mv a b; mv b c; mv c a situation. // FIXME: And now that type no longer exists! ThisScheduler.Add (task); } ////////////////////////////////////////////////////////////////////////// // // This code controls the directory crawl order // private DirectoryModel StupidWalk (DirectoryModel prev_best, DirectoryModel contender) { if (contender.NeedsCrawl) { if (prev_best == null || prev_best.CompareTo (contender) < 0) prev_best = contender; } foreach (DirectoryModel child in contender.Children) prev_best = StupidWalk (prev_best, child); return prev_best; } public DirectoryModel GetNextDirectoryToCrawl () { DirectoryModel next_dir = null; foreach (DirectoryModel root in roots) next_dir = StupidWalk (next_dir, root); return next_dir; } public void DoneCrawlingOneDirectory (DirectoryModel dir) { if (! dir.IsAttached) return; FileAttributes attr; attr = FileAttributesStore.Read (dir.FullName); // Don't mark ourselves; let the crawler redo us if (attr == null) return; // We don't have to be super-careful about this since // we only use the FileAttributes mtime on a directory // to determine its initial state, not whether or not // its index record is up-to-date. attr.LastWriteTime = DateTime.UtcNow; // ...but we do use this to decide which order directories get // crawled in. dir.LastCrawlTime = DateTime.UtcNow; FileAttributesStore.Write (attr); dir.MarkAsClean (); } public void MarkDirectoryAsUncrawlable (DirectoryModel dir) { if (! dir.IsAttached) return; // If we managed to get set up a watch on this directory, // drop it. if (dir.WatchHandle != null) { event_backend.ForgetWatch (dir.WatchHandle); dir.WatchHandle = null; } dir.MarkAsUncrawlable (); } public void Recrawl (string path) { // Try to find a directory model for the path specified // so that we can re-crawl it. DirectoryModel dir; dir = GetDirectoryModelByPath (path); bool path_is_registered = true; if (dir == null) { dir = GetDirectoryModelByPath (FileSystem.GetDirectoryNameRootOk (path)); path_is_registered = false; if (dir == null) { Logger.Log.Debug ("Unable to get directory-model for path: {0}", path); return; } } Logger.Log.Debug ("Re-crawling {0}", dir.FullName); if (tree_crawl_task.Add (dir)) ThisScheduler.Add (tree_crawl_task); if (path_is_registered) Recrawl_Recursive (dir, DirectoryState.PossiblyClean); ActivateFileCrawling (); ActivateDirectoryCrawling (); } public void RecrawlEverything () { Logger.Log.Debug ("Re-crawling all directories"); foreach (DirectoryModel root in roots) Recrawl_Recursive (root, DirectoryState.PossiblyClean); ActivateFileCrawling (); ActivateDirectoryCrawling (); } private void Recrawl_Recursive (DirectoryModel dir, DirectoryState state) { dir.State = state; tree_crawl_task.Add (dir); foreach (DirectoryModel sub_dir in dir.Children) Recrawl_Recursive (sub_dir, state); } private void ActivateFileCrawling () { if (! file_crawl_task.IsActive) ThisScheduler.Add (file_crawl_task); } private void ActivateDirectoryCrawling () { if (! tree_crawl_task.IsActive) ThisScheduler.Add (tree_crawl_task); } ////////////////////////////////////////////////////////////////////////// // // File-related methods // private enum RequiredAction { None, Index, Rename, Forget } static DateTime epoch = new DateTime (1970, 1, 1, 0, 0, 0); static DateTime ToDateTimeUtc (long time_t) { return epoch.AddSeconds (time_t); } private RequiredAction DetermineRequiredAction (DirectoryModel dir, string name, FileAttributes attr, out string last_known_path) { last_known_path = null; string path; path = Path.Combine (dir.FullName, name); if (Debug) Logger.Log.Debug ("*** What should we do with {0}?", path); if (filter.Ignore (dir, name, false)) { // If there are attributes on the file, we must have indexed // it previously. Since we are ignoring it now, we should strip // any file attributes from it. if (attr != null) { if (Debug) Logger.Log.Debug ("*** Forget it: File is ignored but has attributes"); return RequiredAction.Forget; } if (Debug) Logger.Log.Debug ("*** Do nothing: File is ignored"); return RequiredAction.None; } if (attr == null) { if (Debug) Logger.Log.Debug ("*** Index it: File has no attributes"); return RequiredAction.Index; } // FIXME: This does not take in to account that we might have a better matching filter to use now // That, however, is kind of expensive to figure out since we'd have to do mime-sniffing and shit. if (attr.FilterName != null && attr.FilterVersion > 0) { int current_filter_version; current_filter_version = FilterFactory.GetFilterVersion (attr.FilterName); if (current_filter_version > attr.FilterVersion) { if (Debug) Logger.Log.Debug ("*** Index it: Newer filter version found for filter {0}", attr.FilterName); return RequiredAction.Index; } } Mono.Unix.Native.Stat stat; try { Mono.Unix.Native.Syscall.stat (path, out stat); } catch (Exception ex) { Logger.Log.Debug ("Caught exception stat-ing {0}", path); Logger.Log.Debug (ex); return RequiredAction.None; } DateTime last_write_time, last_attr_time; last_write_time = ToDateTimeUtc (stat.st_mtime); last_attr_time = ToDateTimeUtc (stat.st_ctime); if (attr.LastWriteTime != last_write_time) { if (Debug) Logger.Log.Debug ("*** Index it: MTime has changed ({0} vs {1})", attr.LastWriteTime, last_write_time); // If the file has been copied, it will have the // original file's EAs. Thus we have to check to // make sure that the unique id in the EAs actually // belongs to this file. If not, replace it with a new one. // (Thus touching & then immediately renaming a file can // cause its unique id to change, which is less than // optimal but probably can't be helped.) last_known_path = UniqueIdToFullPath (attr.UniqueId); if (path != last_known_path) { if (Debug) Logger.Log.Debug ("*** Name has also changed, assigning new unique id"); attr.UniqueId = Guid.NewGuid (); } return RequiredAction.Index; } // If the inode ctime is newer than the last time we last // set file attributes, we might have been moved. We don't // strictly compare times due to the fact that although // setting xattrs changes the ctime, if we don't have write // access our metadata will be stored in sqlite, and the // ctime will be at some point in the past. if (attr.LastAttrTime < last_attr_time) { if (Debug) Logger.Log.Debug ("*** CTime is newer, checking last known path ({0} vs {1})", attr.LastAttrTime, last_attr_time); last_known_path = UniqueIdToFullPath (attr.UniqueId); if (last_known_path == null) { if (Debug) Logger.Log.Debug ("*** Index it: CTime has changed, but can't determine last known path"); return RequiredAction.Index; } // If the name has changed but the mtime // hasn't, the only logical conclusion is that // the file has been renamed. if (path != last_known_path) { if (Debug) Logger.Log.Debug ("*** Rename it: CTime and path has changed"); return RequiredAction.Rename; } } // We don't have to do anything, which is always preferable. if (Debug) Logger.Log.Debug ("*** Do nothing"); return RequiredAction.None; } // Return an indexable that will do the right thing with a file // (or null, if the right thing is to do nothing) public Indexable GetCrawlingFileIndexable (DirectoryModel dir, string name) { string path; path = Path.Combine (dir.FullName, name); FileAttributes attr; attr = FileAttributesStore.Read (path); RequiredAction action; string last_known_path; action = DetermineRequiredAction (dir, name, attr, out last_known_path); if (action == RequiredAction.None) return null; Guid unique_id; if (attr != null) unique_id = attr.UniqueId; else unique_id = Guid.NewGuid (); Indexable indexable = null; switch (action) { case RequiredAction.Index: indexable = FileToIndexable (path, unique_id, dir, true); break; case RequiredAction.Rename: indexable = NewRenamingIndexable (name, unique_id, dir, last_known_path); break; case RequiredAction.Forget: FileAttributesStore.Drop (path); break; } return indexable; } public void AddFile (DirectoryModel dir, string name) { string path; path = Path.Combine (dir.FullName, name); if (! File.Exists (path)) return; if (FileSystem.IsSpecialFile (path)) return; if (filter.Ignore (dir, name, false)) return; // If this file already has extended attributes, // make sure that the name matches the file // that is in the index. If not, it could be // a copy of an already-indexed file and should // be assigned a new unique id. Guid unique_id = Guid.Empty; FileAttributes attr; attr = FileAttributesStore.Read (path); if (attr != null) { LuceneNameResolver.NameInfo info; info = name_resolver.GetNameInfoById (attr.UniqueId); if (info != null && info.Name == name && info.ParentId == dir.UniqueId) unique_id = attr.UniqueId; } if (unique_id == Guid.Empty) unique_id = Guid.NewGuid (); RegisterId (name, dir, unique_id); Indexable indexable; indexable = FileToIndexable (path, unique_id, dir, false); if (indexable != null) { Scheduler.Task task; task = NewAddTask (indexable); task.Priority = Scheduler.Priority.Immediate; ThisScheduler.Add (task); } } public void RemoveFile (DirectoryModel dir, string name) { // FIXME: We might as well remove it, even if it was being ignore. // Right? Guid unique_id; unique_id = NameAndParentToId (name, dir); if (unique_id == Guid.Empty) { Logger.Log.Info ("Could not resolve unique id of '{0}' in '{1}' for removal, it is probably already gone", name, dir.FullName); return; } Uri uri, file_uri; uri = GuidFu.ToUri (unique_id); file_uri = UriFu.PathToFileUri (Path.Combine (dir.FullName, name)); Indexable indexable; indexable = new Indexable (IndexableType.Remove, uri); indexable.LocalState ["RemovedUri"] = file_uri; Scheduler.Task task; task = NewAddTask (indexable); task.Priority = Scheduler.Priority.Immediate; ThisScheduler.Add (task); } public void MoveFile (DirectoryModel old_dir, string old_name, DirectoryModel new_dir, string new_name) { bool old_ignore, new_ignore; old_ignore = filter.Ignore (old_dir, old_name, false); new_ignore = filter.Ignore (new_dir, new_name, false); if (old_ignore && new_ignore) return; // If our ignore-state is changing, synthesize the appropriate // action. if (old_ignore && ! new_ignore) { AddFile (new_dir, new_name); return; } if (! old_ignore && new_ignore) { RemoveFile (new_dir, new_name); return; } // We need to find the file's unique id. // We can't look at the extended attributes w/o making // assumptions about whether they follow around the // file (EAs) or the path (sqlite)... Guid unique_id; unique_id = NameAndParentToId (old_name, old_dir); if (unique_id == Guid.Empty) { // If we can't find the unique ID, we have to // assume that the original file never made it // into the index --- thus we treat this as // an Add. AddFile (new_dir, new_name); return; } RegisterId (new_name, new_dir, unique_id); string old_path; old_path = Path.Combine (old_dir.FullName, old_name); ForgetId (old_path); // FIXME: I think we need to be more conservative when we seen // events in a directory that has not been fully scanned, just to // avoid races. i.e. what if we are in the middle of crawling that // directory and haven't reached this file yet? Then the rename // will fail. Indexable indexable; indexable = NewRenamingIndexable (new_name, unique_id, new_dir, old_path); Scheduler.Task task; task = NewAddTask (indexable); task.Priority = Scheduler.Priority.Immediate; // Danger Will Robinson! // We need to use BlockUntilNoCollision to get the correct notifications // in a mv a b; mv b c; mv c a situation. // FIXME: And now AddType no longer exists ThisScheduler.Add (task); } ////////////////////////////////////////////////////////////////////////// // Configuration stuff public IList Roots { get { return roots_by_path; } } private void LoadConfiguration () { if (Conf.Indexing.IndexHomeDir) AddRoot (PathFinder.HomeDir); foreach (string root in Conf.Indexing.Roots) AddRoot (root); Conf.Subscribe (typeof (Conf.IndexingConfig), OnConfigurationChanged); } private void OnConfigurationChanged (Conf.Section section) { ArrayList roots_wanted = new ArrayList (Conf.Indexing.Roots); if (Conf.Indexing.IndexHomeDir) roots_wanted.Add (PathFinder.HomeDir); IList roots_to_add, roots_to_remove; ArrayFu.IntersectListChanges (roots_wanted, Roots, out roots_to_add, out roots_to_remove); foreach (string root in roots_to_remove) RemoveRoot (root); foreach (string root in roots_to_add) AddRoot (root); } ////////////////////////////////////////////////////////////////////////// // // Our magic LuceneQueryable hooks // override protected bool PreChildAddHook (Indexable child) { // FIXME: Handling Uri remapping of children is tricky, and there // is also the issue of properly serializing file: uris that // contain fragments. For now we just punt it all by dropping // any child indexables of file system objects. return false; } override protected void PostAddHook (Indexable indexable, IndexerAddedReceipt receipt) { // If we just changed properties, remap to our *old* external Uri // to make notification work out property. if (indexable.Type == IndexableType.PropertyChange) { string last_known_path; last_known_path = (string) indexable.LocalState ["LastKnownPath"]; receipt.Uri = UriFu.PathToFileUri (last_known_path); Logger.Log.Debug ("Last known path is {0}", last_known_path); // This rename is now in the index, so we no longer need to keep // track of the uid in memory. ForgetId (last_known_path); return; } string path; path = (string) indexable.LocalState ["Path"]; ForgetId (path); DirectoryModel parent; parent = indexable.LocalState ["Parent"] as DirectoryModel; // The parent directory might have run away since we were indexed if (parent != null && ! parent.IsAttached) return; Guid unique_id; unique_id = GuidFu.FromUri (receipt.Uri); FileAttributes attr; attr = FileAttributesStore.ReadOrCreate (path, unique_id); attr.Path = path; attr.LastWriteTime = indexable.Timestamp; attr.FilterName = receipt.FilterName; attr.FilterVersion = receipt.FilterVersion; if (indexable.LocalState ["IsWalkable"] != null) { string name; name = (string) indexable.LocalState ["Name"]; if (! RegisterDirectory (name, parent, attr)) return; } FileAttributesStore.Write (attr); // Remap the Uri so that change notification will work properly receipt.Uri = UriFu.PathToFileUri (path); } override protected void PostRemoveHook (Indexable indexable, IndexerRemovedReceipt receipt) { // Find the cached external Uri and remap the Uri in the receipt. // We have to do this to make change notification work. Uri external_uri; external_uri = indexable.LocalState ["RemovedUri"] as Uri; if (external_uri == null) throw new Exception ("No cached external Uri for " + receipt.Uri); receipt.Uri = external_uri; ForgetId (external_uri.LocalPath); } private bool RemapUri (Hit hit) { // Store the hit's internal uri in a property Property prop; prop = Property.NewUnsearched ("beagle:InternalUri", UriFu.UriToSerializableString (hit.Uri)); hit.AddProperty (prop); // Now assemble the path by looking at the parent and name string name, path; name = hit [ExactFilenamePropKey]; if (name == null) { // If we don't have the filename property, we have to do a lookup // based on the guid. This happens with synthetic hits produced by // index listeners. Guid hit_id; hit_id = GuidFu.FromUri (hit.Uri); path = UniqueIdToFullPath (hit_id); } else { string parent_id_uri; parent_id_uri = hit [ParentDirUriPropKey]; if (parent_id_uri == null) return false; Guid parent_id; parent_id = GuidFu.FromUriString (parent_id_uri); path = ToFullPath (name, parent_id); if (path == null) Logger.Log.Debug ("Couldn't find path of file with name '{0}' and parent '{1}'", name, GuidFu.ToShortString (parent_id)); } if (path != null) { hit.Uri = UriFu.PathToFileUri (path); return true; } return false; } // Hit filter: this handles our mapping from internal->external uris, // and checks to see if the file is still there. override protected bool HitFilter (Hit hit) { Uri old_uri = hit.Uri; if (! RemapUri (hit)) return false; string path; path = hit.Uri.LocalPath; bool is_directory; bool exists = false; is_directory = hit.MimeType == "inode/directory"; if (hit.MimeType == null && hit.Uri.IsFile && Directory.Exists (path)) { is_directory = true; exists = true; } if (! exists) { if (is_directory) exists = Directory.Exists (path); else exists = File.Exists (path); } // If the file doesn't exist, we do not schedule a removal and // return false. This is to avoid "losing" files if they are // in a directory that has been renamed but which we haven't // scanned yet... if we dropped them from the index, they would // never get re-indexed (or at least not until the next time they // were touched) since they would still be stamped with EAs // indicating they were up-to-date. And that would be bad. // FIXME: It would be safe if we were in a known state, right? // i.e. every DirectoryModel is clean. if (! exists) return false; // Fetch the parent directory model from our cache to do clever // filtering to determine if we're ignoring it or not. DirectoryModel parent; parent = GetDirectoryModelByPath (Path.GetDirectoryName (path)); // Check the ignore status of the hit if (filter.Ignore (parent, Path.GetFileName (path), is_directory)) return false; return true; } override public string GetSnippet (string [] query_terms, Hit hit) { // Uri remapping from a hit is easy: the internal uri // is stored in a property. Uri uri = UriFu.UriStringToUri (hit ["beagle:InternalUri"]); string path = TextCache.UserCache.LookupPathRaw (uri); if (path == null) return null; // If this is self-cached, use the remapped Uri if (path == TextCache.SELF_CACHE_TAG) return SnippetFu.GetSnippetFromFile (query_terms, hit.Uri.LocalPath); return SnippetFu.GetSnippetFromTextCache (query_terms, path); } override public void Start () { base.Start (); event_backend.Start (this); LoadConfiguration (); Logger.Log.Debug ("Done starting FileSystemQueryable"); } ////////////////////////////////////////////////////////////////////////// // These are the methods that the IFileEventBackend implementations should // call in response to events. public void ReportEventInDirectory (string directory_name) { DirectoryModel dir; dir = GetDirectoryModelByPath (directory_name); // If something goes wrong, just fail silently. if (dir == null) return; // We only use this information to prioritize the order in which // we crawl directories --- so if this directory doesn't // actually need to be crawled, we can safely ignore it. if (! dir.NeedsCrawl) return; dir.LastActivityTime = DateTime.Now; Logger.Log.Debug ("Saw event in '{0}'", directory_name); } public void HandleAddEvent (string directory_name, string file_name, bool is_directory) { Logger.Log.Debug ("*** Add '{0}' '{1}' {2}", directory_name, file_name, is_directory ? "(dir)" : "(file)"); DirectoryModel dir; dir = GetDirectoryModelByPath (directory_name); if (dir == null) { Logger.Log.Warn ("HandleAddEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name); return; } if (is_directory) AddDirectory (dir, file_name); else AddFile (dir, file_name); } public void HandleRemoveEvent (string directory_name, string file_name, bool is_directory) { Logger.Log.Debug ("*** Remove '{0}' '{1}' {2}", directory_name, file_name, is_directory ? "(dir)" : "(file)"); if (is_directory) { string path; path = Path.Combine (directory_name, file_name); DirectoryModel dir; dir = GetDirectoryModelByPath (path); if (dir == null) { Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", path); return; } dir.WatchHandle = null; RemoveDirectory (dir); } else { DirectoryModel dir; dir = GetDirectoryModelByPath (directory_name); if (dir == null) { Logger.Log.Warn ("HandleRemoveEvent failed: Couldn't find DirectoryModel for '{0}'", directory_name); return; } RemoveFile (dir, file_name); } } public void HandleMoveEvent (string old_directory_name, string old_file_name, string new_directory_name, string new_file_name, bool is_directory) { Logger.Log.Debug ("*** Move '{0}' '{1}' -> '{2}' '{3}' {4}", old_directory_name, old_file_name, new_directory_name, new_file_name, is_directory ? "(dir)" : "(file)"); if (is_directory) { DirectoryModel dir, new_parent; dir = GetDirectoryModelByPath (Path.Combine (old_directory_name, old_file_name)); new_parent = GetDirectoryModelByPath (new_directory_name); MoveDirectory (dir, new_parent, new_file_name); return; } else { DirectoryModel old_dir, new_dir; old_dir = GetDirectoryModelByPath (old_directory_name); new_dir = GetDirectoryModelByPath (new_directory_name); MoveFile (old_dir, old_file_name, new_dir, new_file_name); } } public void HandleOverflowEvent () { Logger.Log.Debug ("Queue overflows suck"); } } }