pkg-3166 Sdiff src/modules/server/feed.py

Print this page

3166 feed generation needs performance improvement
3306 feed returns invalid last-modified header

  18 # information: Portions Copyright [yyyy] [name of copyright owner]
  19 #
  20 # CDDL HEADER END
  21 #
  22 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23 # Use is subject to license terms.
  24 
  25 """feed - routines for generating RFC 4287 Atom feeds for packaging server
  26 
  27    At present, the pkg.server.feed module provides a set of routines that, from
  28    a catalog, allow the construction of a feed representing the activity within
  29    a given time period."""
  30 
  31 import cherrypy
  32 from cherrypy.lib.static import serve_file
  33 import cStringIO
  34 import datetime
  35 import httplib
  36 import os
  37 import rfc822

  38 import time
  39 import urllib
  40 import xml.dom.minidom as xmini
  41 
  42 from pkg.misc import get_rel_path, get_res_path
  43 import pkg.catalog as catalog
  44 import pkg.fmri as fmri
  45 import pkg.Uuid25 as uuid
  46 
  47 MIME_TYPE = 'application/atom+xml'
  48 CACHE_FILENAME = "feed.xml"
  49 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ"
  50 
  51 def dt_to_rfc3339_str(ts):
  52         """Returns a string representing a datetime object formatted according
  53         to RFC 3339.
  54         """
  55         return ts.strftime(RFC3339_FMT)
  56 
  57 def rfc3339_str_to_ts(ts_str):
  58         """Returns a timestamp representing 'ts_str', which should be in the
  59         format specified by RFC 3339.
  60         """
  61         return time.mktime(time.strptime(ts_str, RFC3339_FMT))
  62 
  63 def rfc3339_str_to_dt(ts_str):

 184                 if email:
 185                         # If we were able to extract an email address from the
 186                         # maintainer information, add the optional email
 187                         # element to provide a point of communication.
 188                         e = doc.createElement("email")
 189                         et = xmini.Text()
 190                         et.replaceWholeText(email)
 191                         e.appendChild(et)
 192                         a.appendChild(e)
 193 
 194                 # Done with the author.
 195                 feed.appendChild(a)
 196 
 197 operations = {
 198         "+": ["Added", "%s was added to the repository."],
 199         "-": ["Removed", "%s was removed from the repository."],
 200         "U": ["Updated", "%s, an update to an existing package, was added to "
 201             "the repository."]
 202 }
 203 
 204 def add_transaction(request, scfg, rcfg, doc, feed, txn):
 205         """Each transaction is an entry.  We have non-trivial content, so we
 206         can omit summary elements.
 207         """
 208 
 209         e = doc.createElement("entry")
 210 
 211         tag, fmri_str = txn["catalog"].split()
 212         f = fmri.PkgFmri(fmri_str)
 213  
 214         # Generate a 'tag' uri, to uniquely identify the entry, using the fmri.
 215         i = xmini.Text()
 216         i.replaceWholeText(fmri_to_taguri(rcfg, f))
 217         eid = doc.createElement("id")
 218         eid.appendChild(i)
 219         e.appendChild(eid)
 220 
 221         # Attempt to determine the operation that was performed and generate
 222         # the entry title and content.
 223         if txn["operation"] in operations:
 224                 op_title, op_content = operations[txn["operation"]]
 225         else:
 226                 # XXX Better way to reflect an error?  (Aborting will make a
 227                 # non-well-formed document.)
 228                 op_title = "Unknown Operation"
 229                 op_content = "%s was changed in the repository."
 230 
 231         if txn["operation"] == "+":
 232                 c = scfg.updatelog.catalog
 233                 # Get all FMRIs matching the current FMRI's package name.
 234                 matches = catalog.extract_matching_fmris(c.fmris(),
 235                     f.get_name(), matcher=fmri.exact_name_match)
 236 
 237                 if len(matches) > 1:
 238                         # Get the oldest fmri (it's the last entry).
 239                         of = matches[-1]
 240 
 241                         # If the current fmri isn't the oldest one, then this
 242                         # is an update to the package.
 243                         if f != of:
 244                                 # If there is more than one matching FMRI, and
 245                                 # it isn't the same version as the oldest one,
 246                                 # we can assume that this is an update to an
 247                                 # existing package.
 248                                 op_title, op_content = operations["U"]
 249 
 250         # Now add a title for our entry.
 251         etitle = doc.createElement("title")
 252         ti = xmini.Text()
 253         ti.replaceWholeText(" ".join([op_title, fmri_str]))
 254         etitle.appendChild(ti)
 255         e.appendChild(etitle)
 256 
 257         # Indicate when the entry was last updated (in this case, when the
 258         # package was added).
 259         eu = doc.createElement("updated")

 290         """
 291 
 292         # Our configuration is stored in hours, convert it to seconds.
 293         window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60
 294         feed_ts = datetime.datetime.fromtimestamp(t - window_seconds)
 295 
 296         d = xmini.Document()
 297 
 298         feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed")
 299         feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
 300 
 301         set_title(request, rcfg, d, feed, scfg.updatelog.last_update)
 302 
 303         d.appendChild(feed)
 304 
 305         # The feed should be presented in reverse chronological order.
 306         def compare_ul_entries(a, b):
 307                 return cmp(ults_to_ts(a["timestamp"]),
 308                     ults_to_ts(b["timestamp"]))
 309 






 310         for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts),
 311             cmp=compare_ul_entries, reverse=True):
 312                 add_transaction(request, scfg, rcfg, d, feed, txn)
 313 
 314         d.writexml(cf)
 315 
 316 def __get_cache_pathname(scfg):
 317         return os.path.join(scfg.repo_root, CACHE_FILENAME)
 318 
 319 def __clear_cache(scfg):
 320         if scfg.is_read_only():
 321                 # Ignore the request due to server configuration.
 322                 return
 323 
 324         pathname = __get_cache_pathname(scfg)
 325         try:
 326                 if os.path.exists(pathname):
 327                         os.remove(pathname)
 328         except IOError:
 329                 raise cherrypy.HTTPError(
 330                     httplib.INTERNAL_SERVER_ERROR,
 331                     "Unable to clear feed cache.")
 332

 397 
 398         if need_update:
 399                 # Update always looks at feed.window seconds before the last
 400                 # update until "now."  If last is none, we want it to use "now"
 401                 # as its starting point.
 402                 if last is None:
 403                         last = time.time()
 404 
 405                 if scfg.is_read_only():
 406                         # If the server is operating in readonly mode, the
 407                         # feed will have to be generated every time.
 408                         cf = cStringIO.StringIO()
 409                         update(request, scfg, rcfg, last, cf)
 410                         cf.seek(0)
 411                         buf = cf.read()
 412                         cf.close()
 413 
 414                         # Now that the feed has been generated, set the headers
 415                         # correctly and return it.
 416                         response.headers['Content-type'] = MIME_TYPE
 417                         response.headers['Last-Modified'] = \
 418                             datetime.datetime.now().isoformat()


 419                         response.headers['Content-length'] = len(buf)
 420                         return buf
 421                 else:
 422                         # If the server isn't operating in readonly mode, the
 423                         # feed can be generated and cached in inst_dir.
 424                         cf = file(cfpath, "w")
 425                         update(request, scfg, rcfg, last, cf)
 426                         cf.close()
 427 
 428         return serve_file(cfpath, MIME_TYPE)
 429

  18 # information: Portions Copyright [yyyy] [name of copyright owner]
  19 #
  20 # CDDL HEADER END
  21 #
  22 # Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23 # Use is subject to license terms.
  24 
  25 """feed - routines for generating RFC 4287 Atom feeds for packaging server
  26 
  27    At present, the pkg.server.feed module provides a set of routines that, from
  28    a catalog, allow the construction of a feed representing the activity within
  29    a given time period."""
  30 
  31 import cherrypy
  32 from cherrypy.lib.static import serve_file
  33 import cStringIO
  34 import datetime
  35 import httplib
  36 import os
  37 import rfc822
  38 import sys
  39 import time
  40 import urllib
  41 import xml.dom.minidom as xmini
  42 
  43 from pkg.misc import get_rel_path, get_res_path
  44 import pkg.server.catalog as catalog
  45 import pkg.fmri as fmri
  46 import pkg.Uuid25 as uuid
  47 
  48 MIME_TYPE = 'application/atom+xml'
  49 CACHE_FILENAME = "feed.xml"
  50 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ"
  51 
  52 def dt_to_rfc3339_str(ts):
  53         """Returns a string representing a datetime object formatted according
  54         to RFC 3339.
  55         """
  56         return ts.strftime(RFC3339_FMT)
  57 
  58 def rfc3339_str_to_ts(ts_str):
  59         """Returns a timestamp representing 'ts_str', which should be in the
  60         format specified by RFC 3339.
  61         """
  62         return time.mktime(time.strptime(ts_str, RFC3339_FMT))
  63 
  64 def rfc3339_str_to_dt(ts_str):

 185                 if email:
 186                         # If we were able to extract an email address from the
 187                         # maintainer information, add the optional email
 188                         # element to provide a point of communication.
 189                         e = doc.createElement("email")
 190                         et = xmini.Text()
 191                         et.replaceWholeText(email)
 192                         e.appendChild(et)
 193                         a.appendChild(e)
 194 
 195                 # Done with the author.
 196                 feed.appendChild(a)
 197 
 198 operations = {
 199         "+": ["Added", "%s was added to the repository."],
 200         "-": ["Removed", "%s was removed from the repository."],
 201         "U": ["Updated", "%s, an update to an existing package, was added to "
 202             "the repository."]
 203 }
 204 
 205 def add_transaction(request, scfg, rcfg, doc, feed, txn, fmris):
 206         """Each transaction is an entry.  We have non-trivial content, so we
 207         can omit summary elements.
 208         """
 209 
 210         e = doc.createElement("entry")
 211 
 212         tag, fmri_str = txn["catalog"].split()
 213         f = fmri.PkgFmri(fmri_str)
 214  
 215         # Generate a 'tag' uri, to uniquely identify the entry, using the fmri.
 216         i = xmini.Text()
 217         i.replaceWholeText(fmri_to_taguri(rcfg, f))
 218         eid = doc.createElement("id")
 219         eid.appendChild(i)
 220         e.appendChild(eid)
 221 
 222         # Attempt to determine the operation that was performed and generate
 223         # the entry title and content.
 224         if txn["operation"] in operations:
 225                 op_title, op_content = operations[txn["operation"]]
 226         else:
 227                 # XXX Better way to reflect an error?  (Aborting will make a
 228                 # non-well-formed document.)
 229                 op_title = "Unknown Operation"
 230                 op_content = "%s was changed in the repository."
 231 
 232         if txn["operation"] == "+":

 233                 # Get all FMRIs matching the current FMRI's package name.
 234                 matches = fmris[f.pkg_name]
 235                 if len(matches["versions"]) > 1:
 236                         # Get the oldest fmri.
 237                         of = matches[str(matches["versions"][0])][0]


 238 
 239                         # If the current fmri isn't the oldest one, then this
 240                         # is an update to the package.
 241                         if f != of:
 242                                 # If there is more than one matching FMRI, and
 243                                 # it isn't the same version as the oldest one,
 244                                 # we can assume that this is an update to an
 245                                 # existing package.
 246                                 op_title, op_content = operations["U"]
 247 
 248         # Now add a title for our entry.
 249         etitle = doc.createElement("title")
 250         ti = xmini.Text()
 251         ti.replaceWholeText(" ".join([op_title, fmri_str]))
 252         etitle.appendChild(ti)
 253         e.appendChild(etitle)
 254 
 255         # Indicate when the entry was last updated (in this case, when the
 256         # package was added).
 257         eu = doc.createElement("updated")

 288         """
 289 
 290         # Our configuration is stored in hours, convert it to seconds.
 291         window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60
 292         feed_ts = datetime.datetime.fromtimestamp(t - window_seconds)
 293 
 294         d = xmini.Document()
 295 
 296         feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed")
 297         feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
 298 
 299         set_title(request, rcfg, d, feed, scfg.updatelog.last_update)
 300 
 301         d.appendChild(feed)
 302 
 303         # The feed should be presented in reverse chronological order.
 304         def compare_ul_entries(a, b):
 305                 return cmp(ults_to_ts(a["timestamp"]),
 306                     ults_to_ts(b["timestamp"]))
 307 
 308         # Get the entire catalog in the format returned by catalog.cache_fmri,
 309         # so that we don't have to keep looking for possible matches.
 310         fmris = {}
 311         catalog.ServerCatalog.read_catalog(fmris,
 312             scfg.updatelog.catalog.catalog_root)
 313 
 314         for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts),
 315             cmp=compare_ul_entries, reverse=True):
 316                 add_transaction(request, scfg, rcfg, d, feed, txn, fmris)
 317 
 318         d.writexml(cf)
 319 
 320 def __get_cache_pathname(scfg):
 321         return os.path.join(scfg.repo_root, CACHE_FILENAME)
 322 
 323 def __clear_cache(scfg):
 324         if scfg.is_read_only():
 325                 # Ignore the request due to server configuration.
 326                 return
 327 
 328         pathname = __get_cache_pathname(scfg)
 329         try:
 330                 if os.path.exists(pathname):
 331                         os.remove(pathname)
 332         except IOError:
 333                 raise cherrypy.HTTPError(
 334                     httplib.INTERNAL_SERVER_ERROR,
 335                     "Unable to clear feed cache.")
 336

 401 
 402         if need_update:
 403                 # Update always looks at feed.window seconds before the last
 404                 # update until "now."  If last is none, we want it to use "now"
 405                 # as its starting point.
 406                 if last is None:
 407                         last = time.time()
 408 
 409                 if scfg.is_read_only():
 410                         # If the server is operating in readonly mode, the
 411                         # feed will have to be generated every time.
 412                         cf = cStringIO.StringIO()
 413                         update(request, scfg, rcfg, last, cf)
 414                         cf.seek(0)
 415                         buf = cf.read()
 416                         cf.close()
 417 
 418                         # Now that the feed has been generated, set the headers
 419                         # correctly and return it.
 420                         response.headers['Content-type'] = MIME_TYPE
 421 
 422                         # Return the current time and date in GMT.
 423                         response.headers['Last-Modified'] = rfc822.formatdate()
 424 
 425                         response.headers['Content-length'] = len(buf)
 426                         return buf
 427                 else:
 428                         # If the server isn't operating in readonly mode, the
 429                         # feed can be generated and cached in inst_dir.
 430                         cf = file(cfpath, "w")
 431                         update(request, scfg, rcfg, last, cf)
 432                         cf.close()
 433 
 434         return serve_file(cfpath, MIME_TYPE)
 435