1 #!/usr/bin/python2.4
2 #
3 # CDDL HEADER START
4 #
5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
8 #
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
13 #
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 # Use is subject to license terms.
24
25 import subprocess
26 import threading
27 import signal
28 import os
29 import sys
30 import cherrypy
31
32 import pkg.catalog as catalog
33 import pkg.indexer as indexer
34 import pkg.server.query_engine as query_e
35
36 from pkg.misc import SERVER_DEFAULT_MEM_USE_KB
37 from pkg.misc import emsg
38
39 class ServerCatalog(catalog.Catalog):
40 """The catalog information which is only needed by the server."""
41
42 def __init__(self, cat_root, authority = None, pkg_root = None,
43 read_only = False, index_root = None, repo_root = None,
44 rebuild = True):
45
46 self.index_root = index_root
47 self.repo_root = repo_root
48
49 # The update_handle lock protects the update_handle variable.
50 # This allows update_handle to be checked and acted on in a
51 # consistent step, preventing the dropping of needed updates.
52 # The check at the top of refresh index should always be done
53 # prior to deciding to spin off a process for indexing as it
54 # prevents more than one indexing process being run at the same
55 # time.
56 self.searchdb_update_handle_lock = threading.Lock()
57
58 if self.index_root:
59 self.query_engine = \
60 query_e.ServerQueryEngine(self.index_root)
61
62 if os.name == 'posix':
63 try:
64 signal.signal(signal.SIGCHLD,
65 self.child_handler)
66 except ValueError:
67 emsg("Tried to create signal handler in "
68 "a thread other than the main thread")
69
70 self.searchdb_update_handle = None
71 self._search_available = False
72 self.deferred_searchdb_updates = []
73 self.deferred_searchdb_updates_lock = threading.Lock()
74
75 self.refresh_again = False
76
77 catalog.Catalog.__init__(self, cat_root, authority, pkg_root,
78 read_only, rebuild)
79
80 if not self._search_available:
81 self._check_search()
82
83 def whence(self, cmd):
84 if cmd[0] != '/':
85 tmp_cmd = cmd
86 cmd = None
87 path = os.environ['PATH'].split(':')
88 path.append(os.environ['PWD'])
89 for p in path:
90 if os.path.exists(os.path.join(p, tmp_cmd)):
91 cmd = os.path.join(p, tmp_cmd)
92 break
93 assert cmd
94 return cmd
95
96 def refresh_index(self):
97 """ This function refreshes the search indexes if there any new
98 packages. It starts a subprocess which results in a call to
99 run_update_index (see below) which does the actual update.
100 """
101
102 self.searchdb_update_handle_lock.acquire()
103
104 if self.searchdb_update_handle:
105 self.refresh_again = True
106 self.searchdb_update_handle_lock.release()
107 return
108
109 try:
110 fmris_to_index = set(self.fmris())
111
112 indexer.Indexer.check_for_updates(self.index_root,
113 fmris_to_index)
114
115 if fmris_to_index:
116 if os.name == 'posix':
117 cmd = self.whence(sys.argv[0])
118 args = (cmd, "--refresh-index", "-d",
119 self.repo_root)
120 try:
121 self.searchdb_update_handle = \
122 subprocess.Popen(args,
123 stderr = \
124 subprocess.STDOUT)
125 except Exception, e:
126 emsg("Starting the indexing "
127 "process failed")
128 raise
129 else:
130 self.run_update_index()
131 else:
132 # Since there is nothing to index, setup
133 # the index and declare search available.
134 # We only log this if this represents
135 # a change in status of the server.
136 ind = indexer.Indexer(self.index_root,
137 SERVER_DEFAULT_MEM_USE_KB)
138 ind.setup()
139 if not self._search_available:
140 cherrypy.log("Search Available",
141 "INDEX")
142 self._search_available = True
143 finally:
144 self.searchdb_update_handle_lock.release()
145
146 def run_update_index(self):
147 """ Determines which fmris need to be indexed and passes them
148 to the indexer.
149
150 Note: Only one instance of this method should be running.
151 External locking is expected to ensure this behavior. Calling
152 refresh index is the preferred method to use to reindex.
153 """
154 fmris_to_index = set(self.fmris())
155
156 indexer.Indexer.check_for_updates(self.index_root,
157 fmris_to_index)
158
159 if fmris_to_index:
160 self.__update_searchdb_unlocked(fmris_to_index)
161 else:
162 ind = indexer.Indexer(self.index_root,
163 SERVER_DEFAULT_MEM_USE_KB)
164 ind.setup()
165
166 def _check_search(self):
167 ind = indexer.Indexer(self.index_root,
168 SERVER_DEFAULT_MEM_USE_KB)
169 if ind.check_index_existence():
170 self._search_available = True
171 cherrypy.log("Search Available", "INDEX")
172
173 def build_catalog(self):
174 """ Creates an Indexer instance and after building the
175 catalog, refreshes the index.
176 """
177 self._check_search()
178 catalog.Catalog.build_catalog(self)
179 # refresh_index doesn't use file modification times
180 # to determine which packages need to be indexed, so use
181 # it to reindex if it's needed.
182 self.refresh_index()
183
184 def child_handler(self, sig, frame):
185 """ Handler method for the SIGCLD signal. Checks to see if the
186 search database update child has finished, and enables searching
187 if it finished successfully, or logs an error if it didn't.
188 """
189 try:
190 signal.signal(signal.SIGCHLD, self.child_handler)
191 except ValueError:
192 emsg("Tried to create signal handler in "
193 "a thread other than the main thread")
194 # If there's no update_handle, then another subprocess was
195 # spun off and that was what finished. If the poll() returns
196 # None, then while the indexer was running, another process
197 # that was spun off finished.
198 rc = None
199 if not self.searchdb_update_handle:
200 return
201 rc = self.searchdb_update_handle.poll()
202 if rc == None:
203 return
204
205 if rc == 0:
206 self._search_available = True
207 cherrypy.log("Search indexes updated and available.",
208 "INDEX")
209 # Need to acquire this lock to prevent the possibility
210 # of a race condition with refresh_index where a needed
211 # refresh is dropped. It is possible that an extra
212 # refresh will be done with this code, but that refresh
213 # should be very quick to finish.
214 self.searchdb_update_handle_lock.acquire()
215 self.searchdb_update_handle = None
216 self.searchdb_update_handle_lock.release()
217
218 if self.refresh_again:
219 self.refresh_again = False
220 self.refresh_index()
221 elif rc > 0:
222 # XXX This should be logged instead
223 # If the refresh of the index failed, defensively
224 # declare that search is unavailable.
225 self._search_available = False
226 emsg(_("ERROR building search database, rc: %s"))
227 emsg(_(self.searchdb_update_handle.stderr.read()))
228
229 def __update_searchdb_unlocked(self, fmri_list):
230 """ Takes a fmri_list and calls the indexer with a list of fmri
231 and manifest file path pairs. It assumes that all needed
232 locking has already occurred.
233 """
234 assert self.index_root
235 fmri_manifest_list = []
236
237 # Rather than storing those, simply pass along the
238 # file and have the indexer take care of opening and
239 # reading the manifest file. Since the indexer
240 # processes and discards the manifest structure (and its
241 # search dictionary for that matter) this
242 # is much more memory efficient.
243
244 for f in fmri_list:
245 mfst_path = os.path.join(self.pkg_root,
246 f.get_dir_path())
247 fmri_manifest_list.append((f, mfst_path))
248
249 if fmri_manifest_list:
250 index_inst = indexer.Indexer(self.index_root,
251 SERVER_DEFAULT_MEM_USE_KB)
252 index_inst.server_update_index(fmri_manifest_list)
253
254 def search(self, token):
255 """Search through the search database for 'token'. Return a
256 list of token type / fmri pairs."""
257 assert self.index_root
258 if not self.query_engine:
259 self.query_engine = \
260 query_e.ServerQueryEngine(self.index_root)
261 query = query_e.Query(token, case_sensitive=False)
262 return self.query_engine.search(query)