18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 # Use is subject to license terms.
24
25 """feed - routines for generating RFC 4287 Atom feeds for packaging server
26
27 At present, the pkg.server.feed module provides a set of routines that, from
28 a catalog, allow the construction of a feed representing the activity within
29 a given time period."""
30
31 import cherrypy
32 from cherrypy.lib.static import serve_file
33 import cStringIO
34 import datetime
35 import httplib
36 import os
37 import rfc822
38 import time
39 import urllib
40 import xml.dom.minidom as xmini
41
42 from pkg.misc import get_rel_path, get_res_path
43 import pkg.catalog as catalog
44 import pkg.fmri as fmri
45 import pkg.Uuid25 as uuid
46
47 MIME_TYPE = 'application/atom+xml'
48 CACHE_FILENAME = "feed.xml"
49 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ"
50
51 def dt_to_rfc3339_str(ts):
52 """Returns a string representing a datetime object formatted according
53 to RFC 3339.
54 """
55 return ts.strftime(RFC3339_FMT)
56
57 def rfc3339_str_to_ts(ts_str):
58 """Returns a timestamp representing 'ts_str', which should be in the
59 format specified by RFC 3339.
60 """
61 return time.mktime(time.strptime(ts_str, RFC3339_FMT))
62
63 def rfc3339_str_to_dt(ts_str):
184 if email:
185 # If we were able to extract an email address from the
186 # maintainer information, add the optional email
187 # element to provide a point of communication.
188 e = doc.createElement("email")
189 et = xmini.Text()
190 et.replaceWholeText(email)
191 e.appendChild(et)
192 a.appendChild(e)
193
194 # Done with the author.
195 feed.appendChild(a)
196
197 operations = {
198 "+": ["Added", "%s was added to the repository."],
199 "-": ["Removed", "%s was removed from the repository."],
200 "U": ["Updated", "%s, an update to an existing package, was added to "
201 "the repository."]
202 }
203
204 def add_transaction(request, scfg, rcfg, doc, feed, txn):
205 """Each transaction is an entry. We have non-trivial content, so we
206 can omit summary elements.
207 """
208
209 e = doc.createElement("entry")
210
211 tag, fmri_str = txn["catalog"].split()
212 f = fmri.PkgFmri(fmri_str)
213
214 # Generate a 'tag' uri, to uniquely identify the entry, using the fmri.
215 i = xmini.Text()
216 i.replaceWholeText(fmri_to_taguri(rcfg, f))
217 eid = doc.createElement("id")
218 eid.appendChild(i)
219 e.appendChild(eid)
220
221 # Attempt to determine the operation that was performed and generate
222 # the entry title and content.
223 if txn["operation"] in operations:
224 op_title, op_content = operations[txn["operation"]]
225 else:
226 # XXX Better way to reflect an error? (Aborting will make a
227 # non-well-formed document.)
228 op_title = "Unknown Operation"
229 op_content = "%s was changed in the repository."
230
231 if txn["operation"] == "+":
232 c = scfg.updatelog.catalog
233 # Get all FMRIs matching the current FMRI's package name.
234 matches = catalog.extract_matching_fmris(c.fmris(),
235 f.get_name(), matcher=fmri.exact_name_match)
236
237 if len(matches) > 1:
238 # Get the oldest fmri (it's the last entry).
239 of = matches[-1]
240
241 # If the current fmri isn't the oldest one, then this
242 # is an update to the package.
243 if f != of:
244 # If there is more than one matching FMRI, and
245 # it isn't the same version as the oldest one,
246 # we can assume that this is an update to an
247 # existing package.
248 op_title, op_content = operations["U"]
249
250 # Now add a title for our entry.
251 etitle = doc.createElement("title")
252 ti = xmini.Text()
253 ti.replaceWholeText(" ".join([op_title, fmri_str]))
254 etitle.appendChild(ti)
255 e.appendChild(etitle)
256
257 # Indicate when the entry was last updated (in this case, when the
258 # package was added).
259 eu = doc.createElement("updated")
290 """
291
292 # Our configuration is stored in hours, convert it to seconds.
293 window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60
294 feed_ts = datetime.datetime.fromtimestamp(t - window_seconds)
295
296 d = xmini.Document()
297
298 feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed")
299 feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
300
301 set_title(request, rcfg, d, feed, scfg.updatelog.last_update)
302
303 d.appendChild(feed)
304
305 # The feed should be presented in reverse chronological order.
306 def compare_ul_entries(a, b):
307 return cmp(ults_to_ts(a["timestamp"]),
308 ults_to_ts(b["timestamp"]))
309
310 for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts),
311 cmp=compare_ul_entries, reverse=True):
312 add_transaction(request, scfg, rcfg, d, feed, txn)
313
314 d.writexml(cf)
315
316 def __get_cache_pathname(scfg):
317 return os.path.join(scfg.repo_root, CACHE_FILENAME)
318
319 def __clear_cache(scfg):
320 if scfg.is_read_only():
321 # Ignore the request due to server configuration.
322 return
323
324 pathname = __get_cache_pathname(scfg)
325 try:
326 if os.path.exists(pathname):
327 os.remove(pathname)
328 except IOError:
329 raise cherrypy.HTTPError(
330 httplib.INTERNAL_SERVER_ERROR,
331 "Unable to clear feed cache.")
332
397
398 if need_update:
399 # Update always looks at feed.window seconds before the last
400 # update until "now." If last is none, we want it to use "now"
401 # as its starting point.
402 if last is None:
403 last = time.time()
404
405 if scfg.is_read_only():
406 # If the server is operating in readonly mode, the
407 # feed will have to be generated every time.
408 cf = cStringIO.StringIO()
409 update(request, scfg, rcfg, last, cf)
410 cf.seek(0)
411 buf = cf.read()
412 cf.close()
413
414 # Now that the feed has been generated, set the headers
415 # correctly and return it.
416 response.headers['Content-type'] = MIME_TYPE
417 response.headers['Last-Modified'] = \
418 datetime.datetime.now().isoformat()
419 response.headers['Content-length'] = len(buf)
420 return buf
421 else:
422 # If the server isn't operating in readonly mode, the
423 # feed can be generated and cached in inst_dir.
424 cf = file(cfpath, "w")
425 update(request, scfg, rcfg, last, cf)
426 cf.close()
427
428 return serve_file(cfpath, MIME_TYPE)
429
|
18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 # Use is subject to license terms.
24
25 """feed - routines for generating RFC 4287 Atom feeds for packaging server
26
27 At present, the pkg.server.feed module provides a set of routines that, from
28 a catalog, allow the construction of a feed representing the activity within
29 a given time period."""
30
31 import cherrypy
32 from cherrypy.lib.static import serve_file
33 import cStringIO
34 import datetime
35 import httplib
36 import os
37 import rfc822
38 import sys
39 import time
40 import urllib
41 import xml.dom.minidom as xmini
42
43 from pkg.misc import get_rel_path, get_res_path
44 import pkg.server.catalog as catalog
45 import pkg.fmri as fmri
46 import pkg.Uuid25 as uuid
47
48 MIME_TYPE = 'application/atom+xml'
49 CACHE_FILENAME = "feed.xml"
50 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ"
51
52 def dt_to_rfc3339_str(ts):
53 """Returns a string representing a datetime object formatted according
54 to RFC 3339.
55 """
56 return ts.strftime(RFC3339_FMT)
57
58 def rfc3339_str_to_ts(ts_str):
59 """Returns a timestamp representing 'ts_str', which should be in the
60 format specified by RFC 3339.
61 """
62 return time.mktime(time.strptime(ts_str, RFC3339_FMT))
63
64 def rfc3339_str_to_dt(ts_str):
185 if email:
186 # If we were able to extract an email address from the
187 # maintainer information, add the optional email
188 # element to provide a point of communication.
189 e = doc.createElement("email")
190 et = xmini.Text()
191 et.replaceWholeText(email)
192 e.appendChild(et)
193 a.appendChild(e)
194
195 # Done with the author.
196 feed.appendChild(a)
197
198 operations = {
199 "+": ["Added", "%s was added to the repository."],
200 "-": ["Removed", "%s was removed from the repository."],
201 "U": ["Updated", "%s, an update to an existing package, was added to "
202 "the repository."]
203 }
204
205 def add_transaction(request, scfg, rcfg, doc, feed, txn, fmris):
206 """Each transaction is an entry. We have non-trivial content, so we
207 can omit summary elements.
208 """
209
210 e = doc.createElement("entry")
211
212 tag, fmri_str = txn["catalog"].split()
213 f = fmri.PkgFmri(fmri_str)
214
215 # Generate a 'tag' uri, to uniquely identify the entry, using the fmri.
216 i = xmini.Text()
217 i.replaceWholeText(fmri_to_taguri(rcfg, f))
218 eid = doc.createElement("id")
219 eid.appendChild(i)
220 e.appendChild(eid)
221
222 # Attempt to determine the operation that was performed and generate
223 # the entry title and content.
224 if txn["operation"] in operations:
225 op_title, op_content = operations[txn["operation"]]
226 else:
227 # XXX Better way to reflect an error? (Aborting will make a
228 # non-well-formed document.)
229 op_title = "Unknown Operation"
230 op_content = "%s was changed in the repository."
231
232 if txn["operation"] == "+":
233 # Get all FMRIs matching the current FMRI's package name.
234 matches = fmris[f.pkg_name]
235 if len(matches["versions"]) > 1:
236 # Get the oldest fmri.
237 of = matches[str(matches["versions"][0])][0]
238
239 # If the current fmri isn't the oldest one, then this
240 # is an update to the package.
241 if f != of:
242 # If there is more than one matching FMRI, and
243 # it isn't the same version as the oldest one,
244 # we can assume that this is an update to an
245 # existing package.
246 op_title, op_content = operations["U"]
247
248 # Now add a title for our entry.
249 etitle = doc.createElement("title")
250 ti = xmini.Text()
251 ti.replaceWholeText(" ".join([op_title, fmri_str]))
252 etitle.appendChild(ti)
253 e.appendChild(etitle)
254
255 # Indicate when the entry was last updated (in this case, when the
256 # package was added).
257 eu = doc.createElement("updated")
288 """
289
290 # Our configuration is stored in hours, convert it to seconds.
291 window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60
292 feed_ts = datetime.datetime.fromtimestamp(t - window_seconds)
293
294 d = xmini.Document()
295
296 feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed")
297 feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
298
299 set_title(request, rcfg, d, feed, scfg.updatelog.last_update)
300
301 d.appendChild(feed)
302
303 # The feed should be presented in reverse chronological order.
304 def compare_ul_entries(a, b):
305 return cmp(ults_to_ts(a["timestamp"]),
306 ults_to_ts(b["timestamp"]))
307
308 # Get the entire catalog in the format returned by catalog.cache_fmri,
309 # so that we don't have to keep looking for possible matches.
310 fmris = {}
311 catalog.ServerCatalog.read_catalog(fmris,
312 scfg.updatelog.catalog.catalog_root)
313
314 for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts),
315 cmp=compare_ul_entries, reverse=True):
316 add_transaction(request, scfg, rcfg, d, feed, txn, fmris)
317
318 d.writexml(cf)
319
320 def __get_cache_pathname(scfg):
321 return os.path.join(scfg.repo_root, CACHE_FILENAME)
322
323 def __clear_cache(scfg):
324 if scfg.is_read_only():
325 # Ignore the request due to server configuration.
326 return
327
328 pathname = __get_cache_pathname(scfg)
329 try:
330 if os.path.exists(pathname):
331 os.remove(pathname)
332 except IOError:
333 raise cherrypy.HTTPError(
334 httplib.INTERNAL_SERVER_ERROR,
335 "Unable to clear feed cache.")
336
401
402 if need_update:
403 # Update always looks at feed.window seconds before the last
404 # update until "now." If last is none, we want it to use "now"
405 # as its starting point.
406 if last is None:
407 last = time.time()
408
409 if scfg.is_read_only():
410 # If the server is operating in readonly mode, the
411 # feed will have to be generated every time.
412 cf = cStringIO.StringIO()
413 update(request, scfg, rcfg, last, cf)
414 cf.seek(0)
415 buf = cf.read()
416 cf.close()
417
418 # Now that the feed has been generated, set the headers
419 # correctly and return it.
420 response.headers['Content-type'] = MIME_TYPE
421
422 # Return the current time and date in GMT.
423 response.headers['Last-Modified'] = rfc822.formatdate()
424
425 response.headers['Content-length'] = len(buf)
426 return buf
427 else:
428 # If the server isn't operating in readonly mode, the
429 # feed can be generated and cached in inst_dir.
430 cf = file(cfpath, "w")
431 update(request, scfg, rcfg, last, cf)
432 cf.close()
433
434 return serve_file(cfpath, MIME_TYPE)
435
|