Print this page
3166 feed generation needs performance improvement
3306 feed returns invalid last-modified header
Split |
Close |
Expand all |
Collapse all |
--- old/src/modules/server/feed.py
+++ new/src/modules/server/feed.py
1 1 #!/usr/bin/python2.4
2 2 #
3 3 # CDDL HEADER START
4 4 #
5 5 # The contents of this file are subject to the terms of the
6 6 # Common Development and Distribution License (the "License").
7 7 # You may not use this file except in compliance with the License.
8 8 #
9 9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 10 # or http://www.opensolaris.org/os/licensing.
11 11 # See the License for the specific language governing permissions
12 12 # and limitations under the License.
13 13 #
14 14 # When distributing Covered Code, include this CDDL HEADER in each
15 15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 16 # If applicable, add the following below this CDDL HEADER, with the
17 17 # fields enclosed by brackets "[]" replaced with your own identifying
18 18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 19 #
20 20 # CDDL HEADER END
21 21 #
22 22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 # Use is subject to license terms.
24 24
25 25 """feed - routines for generating RFC 4287 Atom feeds for packaging server
26 26
27 27 At present, the pkg.server.feed module provides a set of routines that, from
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
28 28 a catalog, allow the construction of a feed representing the activity within
29 29 a given time period."""
30 30
31 31 import cherrypy
32 32 from cherrypy.lib.static import serve_file
33 33 import cStringIO
34 34 import datetime
35 35 import httplib
36 36 import os
37 37 import rfc822
38 +import sys
38 39 import time
39 40 import urllib
40 41 import xml.dom.minidom as xmini
41 42
42 43 from pkg.misc import get_rel_path, get_res_path
43 -import pkg.catalog as catalog
44 +import pkg.server.catalog as catalog
44 45 import pkg.fmri as fmri
45 46 import pkg.Uuid25 as uuid
46 47
47 48 MIME_TYPE = 'application/atom+xml'
48 49 CACHE_FILENAME = "feed.xml"
49 50 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ"
50 51
51 52 def dt_to_rfc3339_str(ts):
52 53 """Returns a string representing a datetime object formatted according
53 54 to RFC 3339.
54 55 """
55 56 return ts.strftime(RFC3339_FMT)
56 57
57 58 def rfc3339_str_to_ts(ts_str):
58 59 """Returns a timestamp representing 'ts_str', which should be in the
59 60 format specified by RFC 3339.
60 61 """
61 62 return time.mktime(time.strptime(ts_str, RFC3339_FMT))
62 63
63 64 def rfc3339_str_to_dt(ts_str):
64 65 """Returns a datetime object representing 'ts_str', which should be in
65 66 the format specified by RFC 3339.
66 67 """
67 68 return datetime.datetime(*time.strptime(ts_str, RFC3339_FMT)[0:6])
68 69
69 70 def ults_to_ts(ts_str):
70 71 """Returns a timestamp representing 'ts_str', which should be in
71 72 updatelog format.
72 73 """
73 74 # Python doesn't support fractional seconds for strptime.
74 75 ts_str = ts_str.split('.')[0]
75 76 # Currently, updatelog entries are in local time, not UTC.
76 77 return time.mktime(time.strptime(ts_str, "%Y-%m-%dT%H:%M:%S"))
77 78
78 79 def ults_to_rfc3339_str(ts_str):
79 80 """Returns a timestamp representing 'ts_str', which should be in
80 81 updatelog format.
81 82 """
82 83 ltime = ults_to_ts(ts_str)
83 84 # Currently, updatelog entries are in local time, not UTC.
84 85 return dt_to_rfc3339_str(datetime.datetime(
85 86 *time.gmtime(ltime)[0:6]))
86 87
87 88 def fmri_to_taguri(rcfg, f):
88 89 """Generates a 'tag' uri compliant with RFC 4151. Visit
89 90 http://www.taguri.org/ for more information.
90 91 """
91 92 return "tag:%s,%s:%s" % (rcfg.get_attribute("feed",
92 93 "authority"), f.get_timestamp().strftime("%Y-%m-%d"),
93 94 urllib.unquote(f.get_url_path()))
94 95
95 96 def init(scfg, rcfg):
96 97 """This function performs general initialization work that is needed
97 98 for feeds to work correctly.
98 99 """
99 100
100 101 if not scfg.is_read_only():
101 102 # RSS/Atom feeds require a unique identifier, so
102 103 # generate one if isn't defined already. This
103 104 # needs to be a persistent value, so we only
104 105 # generate this if we can save the configuration.
105 106 fid = rcfg.get_attribute("feed", "id")
106 107 if not fid:
107 108 # Create a random UUID (type 4).
108 109 rcfg._set_attribute("feed", "id", uuid.uuid4())
109 110
110 111 # Ensure any configuration changes are reflected in the feed.
111 112 __clear_cache(scfg)
112 113
113 114 def set_title(request, rcfg, doc, feed, update_ts):
114 115 """This function attaches the necessary RSS/Atom feed elements needed
115 116 to provide title, author and contact information to the provided
116 117 xmini document object using the provided feed object and update
117 118 time.
118 119 """
119 120
120 121 t = doc.createElement("title")
121 122 ti = xmini.Text()
122 123 ti.replaceWholeText(rcfg.get_attribute("feed", "name"))
123 124 t.appendChild(ti)
124 125 feed.appendChild(t)
125 126
126 127 l = doc.createElement("link")
127 128 l.setAttribute("href", cherrypy.url())
128 129 l.setAttribute("rel", "self")
129 130 feed.appendChild(l)
130 131
131 132 # Atom requires each feed to have a permanent, universally unique
132 133 # identifier.
133 134 i = doc.createElement("id")
134 135 it = xmini.Text()
135 136 it.replaceWholeText("urn:uuid:%s" % rcfg.get_attribute("feed", "id"))
136 137 i.appendChild(it)
137 138 feed.appendChild(i)
138 139
139 140 # Indicate when the feed was last updated.
140 141 u = doc.createElement("updated")
141 142 ut = xmini.Text()
142 143 ut.replaceWholeText(dt_to_rfc3339_str(update_ts))
143 144 u.appendChild(ut)
144 145 feed.appendChild(u)
145 146
146 147 # Add our icon.
147 148 i = doc.createElement("icon")
148 149 it = xmini.Text()
149 150 it.replaceWholeText(get_res_path(request, rcfg.get_attribute(
150 151 "feed", "icon")))
151 152 i.appendChild(it)
152 153 feed.appendChild(i)
153 154
154 155 # Add our logo.
155 156 l = doc.createElement("logo")
156 157 lt = xmini.Text()
157 158 lt.replaceWholeText(get_res_path(request, rcfg.get_attribute(
158 159 "feed", "logo")))
159 160 l.appendChild(lt)
160 161 feed.appendChild(l)
161 162
162 163 maintainer = rcfg.get_attribute("repository", "maintainer")
163 164 # The author information isn't required, but can be useful.
164 165 if maintainer:
165 166 name, email = rfc822.AddressList(maintainer).addresslist[0]
166 167
167 168 if email and not name:
168 169 # If we got an email address, but no name, then
169 170 # the name was likely parsed as a local address. In
170 171 # that case, assume the whole string is the name.
171 172 name = maintainer
172 173 email = None
173 174
174 175 a = doc.createElement("author")
175 176
176 177 # First we have to add a name element. This is required if an
177 178 # author element exists.
178 179 n = doc.createElement("name")
179 180 nt = xmini.Text()
180 181 nt.replaceWholeText(name)
181 182 n.appendChild(nt)
182 183 a.appendChild(n)
183 184
184 185 if email:
185 186 # If we were able to extract an email address from the
186 187 # maintainer information, add the optional email
187 188 # element to provide a point of communication.
188 189 e = doc.createElement("email")
189 190 et = xmini.Text()
190 191 et.replaceWholeText(email)
191 192 e.appendChild(et)
192 193 a.appendChild(e)
193 194
↓ open down ↓ |
140 lines elided |
↑ open up ↑ |
194 195 # Done with the author.
195 196 feed.appendChild(a)
196 197
197 198 operations = {
198 199 "+": ["Added", "%s was added to the repository."],
199 200 "-": ["Removed", "%s was removed from the repository."],
200 201 "U": ["Updated", "%s, an update to an existing package, was added to "
201 202 "the repository."]
202 203 }
203 204
204 -def add_transaction(request, scfg, rcfg, doc, feed, txn):
205 +def add_transaction(request, scfg, rcfg, doc, feed, txn, fmris):
205 206 """Each transaction is an entry. We have non-trivial content, so we
206 207 can omit summary elements.
207 208 """
208 209
209 210 e = doc.createElement("entry")
210 211
211 212 tag, fmri_str = txn["catalog"].split()
212 213 f = fmri.PkgFmri(fmri_str)
213 214
214 215 # Generate a 'tag' uri, to uniquely identify the entry, using the fmri.
215 216 i = xmini.Text()
216 217 i.replaceWholeText(fmri_to_taguri(rcfg, f))
217 218 eid = doc.createElement("id")
218 219 eid.appendChild(i)
219 220 e.appendChild(eid)
220 221
221 222 # Attempt to determine the operation that was performed and generate
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
222 223 # the entry title and content.
223 224 if txn["operation"] in operations:
224 225 op_title, op_content = operations[txn["operation"]]
225 226 else:
226 227 # XXX Better way to reflect an error? (Aborting will make a
227 228 # non-well-formed document.)
228 229 op_title = "Unknown Operation"
229 230 op_content = "%s was changed in the repository."
230 231
231 232 if txn["operation"] == "+":
232 - c = scfg.updatelog.catalog
233 233 # Get all FMRIs matching the current FMRI's package name.
234 - matches = catalog.extract_matching_fmris(c.fmris(),
235 - f.get_name(), matcher=fmri.exact_name_match)
236 -
237 - if len(matches) > 1:
238 - # Get the oldest fmri (it's the last entry).
239 - of = matches[-1]
234 + matches = fmris[f.pkg_name]
235 + if len(matches["versions"]) > 1:
236 + # Get the oldest fmri.
237 + of = matches[str(matches["versions"][0])][0]
240 238
241 239 # If the current fmri isn't the oldest one, then this
242 240 # is an update to the package.
243 241 if f != of:
244 242 # If there is more than one matching FMRI, and
245 243 # it isn't the same version as the oldest one,
246 244 # we can assume that this is an update to an
247 245 # existing package.
248 246 op_title, op_content = operations["U"]
249 247
250 248 # Now add a title for our entry.
251 249 etitle = doc.createElement("title")
252 250 ti = xmini.Text()
253 251 ti.replaceWholeText(" ".join([op_title, fmri_str]))
254 252 etitle.appendChild(ti)
255 253 e.appendChild(etitle)
256 254
257 255 # Indicate when the entry was last updated (in this case, when the
258 256 # package was added).
259 257 eu = doc.createElement("updated")
260 258 ut = xmini.Text()
261 259 ut.replaceWholeText(ults_to_rfc3339_str(txn["timestamp"]))
262 260 eu.appendChild(ut)
263 261 e.appendChild(eu)
264 262
265 263 # Link to the info output for the given package FMRI.
266 264 e_uri = get_rel_path(request, 'info/0/%s' % f.get_url_path())
267 265
268 266 l = doc.createElement("link")
269 267 l.setAttribute("rel", "alternate")
270 268 l.setAttribute("href", e_uri)
271 269 e.appendChild(l)
272 270
273 271 # Using the description for the operation performed, add the FMRI and
274 272 # tag information.
275 273 content_text = op_content % fmri_str
276 274 if tag == "C":
277 275 content_text += " This version is tagged as critical."
278 276
279 277 co = xmini.Text()
280 278 co.replaceWholeText(content_text)
281 279 ec = doc.createElement("content")
282 280 ec.appendChild(co)
283 281 e.appendChild(ec)
284 282
285 283 feed.appendChild(e)
286 284
287 285 def update(request, scfg, rcfg, t, cf):
288 286 """Generate new Atom document for current updates. The cached feed
289 287 file is written to scfg.repo_root/CACHE_FILENAME.
290 288 """
291 289
292 290 # Our configuration is stored in hours, convert it to seconds.
293 291 window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60
294 292 feed_ts = datetime.datetime.fromtimestamp(t - window_seconds)
295 293
296 294 d = xmini.Document()
297 295
298 296 feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed")
299 297 feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
↓ open down ↓ |
50 lines elided |
↑ open up ↑ |
300 298
301 299 set_title(request, rcfg, d, feed, scfg.updatelog.last_update)
302 300
303 301 d.appendChild(feed)
304 302
305 303 # The feed should be presented in reverse chronological order.
306 304 def compare_ul_entries(a, b):
307 305 return cmp(ults_to_ts(a["timestamp"]),
308 306 ults_to_ts(b["timestamp"]))
309 307
308 + # Get the entire catalog in the format returned by catalog.cache_fmri,
309 + # so that we don't have to keep looking for possible matches.
310 + fmris = {}
311 + catalog.ServerCatalog.read_catalog(fmris,
312 + scfg.updatelog.catalog.catalog_root)
313 +
310 314 for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts),
311 315 cmp=compare_ul_entries, reverse=True):
312 - add_transaction(request, scfg, rcfg, d, feed, txn)
316 + add_transaction(request, scfg, rcfg, d, feed, txn, fmris)
313 317
314 318 d.writexml(cf)
315 319
316 320 def __get_cache_pathname(scfg):
317 321 return os.path.join(scfg.repo_root, CACHE_FILENAME)
318 322
319 323 def __clear_cache(scfg):
320 324 if scfg.is_read_only():
321 325 # Ignore the request due to server configuration.
322 326 return
323 327
324 328 pathname = __get_cache_pathname(scfg)
325 329 try:
326 330 if os.path.exists(pathname):
327 331 os.remove(pathname)
328 332 except IOError:
329 333 raise cherrypy.HTTPError(
330 334 httplib.INTERNAL_SERVER_ERROR,
331 335 "Unable to clear feed cache.")
332 336
333 337 def __cache_needs_update(scfg):
334 338 """Checks to see if the feed cache file exists and if it is still
335 339 valid. Returns False, None if the cache is valid or True, last
336 340 where last is a timestamp representing when the cache was
337 341 generated.
338 342 """
339 343 cfpath = __get_cache_pathname(scfg)
340 344 last = None
341 345 need_update = True
342 346 if os.path.isfile(cfpath):
343 347 # Attempt to parse the cached copy. If we can't, for any
344 348 # reason, assume we need to remove it and start over.
345 349 try:
346 350 d = xmini.parse(cfpath)
347 351 except Exception:
348 352 d = None
349 353 __clear_cache(scfg)
350 354
351 355 # Get the feed element and attempt to get the time we last
352 356 # generated the feed to determine whether we need to regenerate
353 357 # it. If for some reason we can't get that information, assume
354 358 # the cache is invalid, clear it, and force regeneration.
355 359 fe = None
356 360 if d:
357 361 fe = d.childNodes[0]
358 362
359 363 if fe:
360 364 utn = None
361 365 for cnode in fe.childNodes:
362 366 if cnode.nodeName == "updated":
363 367 utn = cnode.childNodes[0]
364 368 break
365 369
366 370 if utn:
367 371 last_ts = rfc3339_str_to_dt(utn.nodeValue)
368 372
369 373 # Since our feed cache and updatelog might have
370 374 # been created within the same second, we need
371 375 # to ignore small variances when determining
372 376 # whether to update the feed cache.
373 377 update_ts = scfg.updatelog.last_update.replace(
374 378 microsecond=0)
375 379
376 380 if last_ts >= update_ts:
377 381 need_update = False
378 382 else:
379 383 last = rfc3339_str_to_ts(utn.nodeValue)
380 384 else:
381 385 __clear_cache(scfg)
382 386 else:
383 387 __clear_cache(scfg)
384 388
385 389 return need_update, last
386 390
387 391 def handle(scfg, rcfg, request, response):
388 392 """If there have been package updates since we last generated the feed,
389 393 update the feed and send it to the client. Otherwise, send them the
390 394 cached copy if it is available.
391 395 """
392 396
393 397 cfpath = __get_cache_pathname(scfg)
394 398
395 399 # First check to see if we already have a valid cache of the feed.
396 400 need_update, last = __cache_needs_update(scfg)
397 401
398 402 if need_update:
399 403 # Update always looks at feed.window seconds before the last
400 404 # update until "now." If last is none, we want it to use "now"
401 405 # as its starting point.
402 406 if last is None:
403 407 last = time.time()
404 408
405 409 if scfg.is_read_only():
406 410 # If the server is operating in readonly mode, the
↓ open down ↓ |
84 lines elided |
↑ open up ↑ |
407 411 # feed will have to be generated every time.
408 412 cf = cStringIO.StringIO()
409 413 update(request, scfg, rcfg, last, cf)
410 414 cf.seek(0)
411 415 buf = cf.read()
412 416 cf.close()
413 417
414 418 # Now that the feed has been generated, set the headers
415 419 # correctly and return it.
416 420 response.headers['Content-type'] = MIME_TYPE
417 - response.headers['Last-Modified'] = \
418 - datetime.datetime.now().isoformat()
421 +
422 + # Return the current time and date in GMT.
423 + response.headers['Last-Modified'] = rfc822.formatdate()
424 +
419 425 response.headers['Content-length'] = len(buf)
420 426 return buf
421 427 else:
422 428 # If the server isn't operating in readonly mode, the
423 429 # feed can be generated and cached in inst_dir.
424 430 cf = file(cfpath, "w")
425 431 update(request, scfg, rcfg, last, cf)
426 432 cf.close()
427 433
428 434 return serve_file(cfpath, MIME_TYPE)
429 435
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX