1 #!/usr/bin/python2.4
2 #
3 # CDDL HEADER START
4 #
5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
8 #
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
13 #
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 # Use is subject to license terms.
24
25 import subprocess
26 import threading
27 import signal
28 import os
29 import sys
30 import cherrypy
31
32 import pkg.catalog as catalog
33 import pkg.fmri as fmri
34 import pkg.indexer as indexer
35 import pkg.server.query_engine as query_e
36
37 from pkg.misc import SERVER_DEFAULT_MEM_USE_KB
38 from pkg.misc import emsg
39
40 class ServerCatalog(catalog.Catalog):
41 """The catalog information which is only needed by the server."""
42
43 def __init__(self, cat_root, authority = None, pkg_root = None,
44 read_only = False, index_root = None, repo_root = None,
45 rebuild = True):
46
47 self.index_root = index_root
48 self.repo_root = repo_root
49
50 # The update_handle lock protects the update_handle variable.
51 # This allows update_handle to be checked and acted on in a
52 # consistent step, preventing the dropping of needed updates.
53 # The check at the top of refresh index should always be done
54 # prior to deciding to spin off a process for indexing as it
55 # prevents more than one indexing process being run at the same
56 # time.
57 self.searchdb_update_handle_lock = threading.Lock()
58
59 if self.index_root:
60 self.query_engine = \
61 query_e.ServerQueryEngine(self.index_root)
62
63 if os.name == 'posix':
64 try:
65 signal.signal(signal.SIGCHLD,
66 self.child_handler)
67 except ValueError:
68 emsg("Tried to create signal handler in "
69 "a thread other than the main thread")
70
71 self.searchdb_update_handle = None
72 self._search_available = False
73 self.deferred_searchdb_updates = []
74 self.deferred_searchdb_updates_lock = threading.Lock()
75
76 self.refresh_again = False
77
78 catalog.Catalog.__init__(self, cat_root, authority, pkg_root,
79 read_only, rebuild)
80
81 if not self._search_available:
82 self._check_search()
83
84 def whence(self, cmd):
85 if cmd[0] != '/':
86 tmp_cmd = cmd
87 cmd = None
88 path = os.environ['PATH'].split(':')
89 path.append(os.environ['PWD'])
90 for p in path:
91 if os.path.exists(os.path.join(p, tmp_cmd)):
92 cmd = os.path.join(p, tmp_cmd)
93 break
94 assert cmd
95 return cmd
96
97 def refresh_index(self):
98 """ This function refreshes the search indexes if there any new
99 packages. It starts a subprocess which results in a call to
100 run_update_index (see below) which does the actual update.
101 """
102
103 self.searchdb_update_handle_lock.acquire()
104
105 if self.searchdb_update_handle:
106 self.refresh_again = True
107 self.searchdb_update_handle_lock.release()
108 return
109
110 try:
111 fmris_to_index = set(self.fmris())
112
113 indexer.Indexer.check_for_updates(self.index_root,
114 fmris_to_index)
115
116 if fmris_to_index:
117 if os.name == 'posix':
118 cmd = self.whence(sys.argv[0])
119 args = (cmd, "--refresh-index", "-d",
120 self.repo_root)
121 try:
122 self.searchdb_update_handle = \
123 subprocess.Popen(args,
124 stderr = \
125 subprocess.STDOUT)
126 except Exception, e:
127 emsg("Starting the indexing "
128 "process failed")
129 raise
130 else:
131 self.run_update_index()
132 else:
133 # Since there is nothing to index, setup
134 # the index and declare search available.
135 # We only log this if this represents
136 # a change in status of the server.
137 ind = indexer.Indexer(self.index_root,
138 SERVER_DEFAULT_MEM_USE_KB)
139 ind.setup()
140 if not self._search_available:
141 cherrypy.log("Search Available",
142 "INDEX")
143 self._search_available = True
144 finally:
145 self.searchdb_update_handle_lock.release()
146
147 def run_update_index(self):
148 """ Determines which fmris need to be indexed and passes them
149 to the indexer.
150
151 Note: Only one instance of this method should be running.
152 External locking is expected to ensure this behavior. Calling
153 refresh index is the preferred method to use to reindex.
154 """
155 fmris_to_index = set(self.fmris())
156
157 indexer.Indexer.check_for_updates(self.index_root,
158 fmris_to_index)
159
160 if fmris_to_index:
161 self.__update_searchdb_unlocked(fmris_to_index)
162 else:
163 ind = indexer.Indexer(self.index_root,
164 SERVER_DEFAULT_MEM_USE_KB)
165 ind.setup()
166
167 def _check_search(self):
168 ind = indexer.Indexer(self.index_root,
169 SERVER_DEFAULT_MEM_USE_KB)
170 if ind.check_index_existence():
171 self._search_available = True
172 cherrypy.log("Search Available", "INDEX")
173
174 def build_catalog(self):
175 """ Creates an Indexer instance and after building the
176 catalog, refreshes the index.
177 """
178 self._check_search()
179 catalog.Catalog.build_catalog(self)
180 # refresh_index doesn't use file modification times
181 # to determine which packages need to be indexed, so use
182 # it to reindex if it's needed.
183 self.refresh_index()
184
185 def child_handler(self, sig, frame):
186 """ Handler method for the SIGCLD signal. Checks to see if the
187 search database update child has finished, and enables searching
188 if it finished successfully, or logs an error if it didn't.
189 """
190 try:
191 signal.signal(signal.SIGCHLD, self.child_handler)
192 except ValueError:
193 emsg("Tried to create signal handler in "
194 "a thread other than the main thread")
195 # If there's no update_handle, then another subprocess was
196 # spun off and that was what finished. If the poll() returns
197 # None, then while the indexer was running, another process
198 # that was spun off finished.
199 rc = None
200 if not self.searchdb_update_handle:
201 return
202 rc = self.searchdb_update_handle.poll()
203 if rc == None:
204 return
205
206 if rc == 0:
207 self._search_available = True
208 cherrypy.log("Search indexes updated and available.",
209 "INDEX")
210 # Need to acquire this lock to prevent the possibility
211 # of a race condition with refresh_index where a needed
212 # refresh is dropped. It is possible that an extra
213 # refresh will be done with this code, but that refresh
214 # should be very quick to finish.
215 self.searchdb_update_handle_lock.acquire()
216 self.searchdb_update_handle = None
217 self.searchdb_update_handle_lock.release()
218
219 if self.refresh_again:
220 self.refresh_again = False
221 self.refresh_index()
222 elif rc > 0:
223 # XXX This should be logged instead
224 # If the refresh of the index failed, defensively
225 # declare that search is unavailable.
226 self._search_available = False
227 emsg(_("ERROR building search database, rc: %s"))
228 emsg(_(self.searchdb_update_handle.stderr.read()))
229
230 def __update_searchdb_unlocked(self, fmri_list):
231 """ Takes a fmri_list and calls the indexer with a list of fmri
232 and manifest file path pairs. It assumes that all needed
233 locking has already occurred.
234 """
235 assert self.index_root
236 fmri_manifest_list = []
237
238 # Rather than storing those, simply pass along the
239 # file and have the indexer take care of opening and
240 # reading the manifest file. Since the indexer
241 # processes and discards the manifest structure (and its
242 # search dictionary for that matter) this
243 # is much more memory efficient.
244
245 for f in fmri_list:
246 mfst_path = os.path.join(self.pkg_root,
247 f.get_dir_path())
248 fmri_manifest_list.append((f, mfst_path))
249
250 if fmri_manifest_list:
251 index_inst = indexer.Indexer(self.index_root,
252 SERVER_DEFAULT_MEM_USE_KB)
253 index_inst.server_update_index(fmri_manifest_list)
254
255 def search(self, token):
256 """Search through the search database for 'token'. Return a
257 list of token type / fmri pairs."""
258 assert self.index_root
259 if not self.query_engine:
260 self.query_engine = \
261 query_e.ServerQueryEngine(self.index_root)
262 query = query_e.Query(token, case_sensitive=False)
263 return self.query_engine.search(query)
264
265 @staticmethod
266 def read_catalog(catalog, dir, auth=None):
267 """Read the catalog file in "dir" and combine it with the
268 existing data in "catalog"."""
269
270 catf = file(os.path.join(dir, "catalog"))
271 for line in catf:
272 if not line.startswith("V pkg") and \
273 not line.startswith("C pkg"):
274 continue
275
276 f = fmri.PkgFmri(line[7:])
277 ServerCatalog.cache_fmri(catalog, f, auth)
278
279 catf.close()
280