1 #!/usr/bin/python2.4 2 # 3 # CDDL HEADER START 4 # 5 # The contents of this file are subject to the terms of the 6 # Common Development and Distribution License (the "License"). 7 # You may not use this file except in compliance with the License. 8 # 9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 # or http://www.opensolaris.org/os/licensing. 11 # See the License for the specific language governing permissions 12 # and limitations under the License. 13 # 14 # When distributing Covered Code, include this CDDL HEADER in each 15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 # If applicable, add the following below this CDDL HEADER, with the 17 # fields enclosed by brackets "[]" replaced with your own identifying 18 # information: Portions Copyright [yyyy] [name of copyright owner] 19 # 20 # CDDL HEADER END 21 # 22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 # Use is subject to license terms. 24 25 """feed - routines for generating RFC 4287 Atom feeds for packaging server 26 27 At present, the pkg.server.feed module provides a set of routines that, from 28 a catalog, allow the construction of a feed representing the activity within 29 a given time period.""" 30 31 import cherrypy 32 from cherrypy.lib.static import serve_file 33 import cStringIO 34 import datetime 35 import httplib 36 import os 37 import rfc822 38 import sys 39 import time 40 import urllib 41 import xml.dom.minidom as xmini 42 43 from pkg.misc import get_rel_path, get_res_path 44 import pkg.server.catalog as catalog 45 import pkg.fmri as fmri 46 import pkg.Uuid25 as uuid 47 48 MIME_TYPE = 'application/atom+xml' 49 CACHE_FILENAME = "feed.xml" 50 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ" 51 52 def dt_to_rfc3339_str(ts): 53 """Returns a string representing a datetime object formatted according 54 to RFC 3339. 55 """ 56 return ts.strftime(RFC3339_FMT) 57 58 def rfc3339_str_to_ts(ts_str): 59 """Returns a timestamp representing 'ts_str', which should be in the 60 format specified by RFC 3339. 61 """ 62 return time.mktime(time.strptime(ts_str, RFC3339_FMT)) 63 64 def rfc3339_str_to_dt(ts_str): 65 """Returns a datetime object representing 'ts_str', which should be in 66 the format specified by RFC 3339. 67 """ 68 return datetime.datetime(*time.strptime(ts_str, RFC3339_FMT)[0:6]) 69 70 def ults_to_ts(ts_str): 71 """Returns a timestamp representing 'ts_str', which should be in 72 updatelog format. 73 """ 74 # Python doesn't support fractional seconds for strptime. 75 ts_str = ts_str.split('.')[0] 76 # Currently, updatelog entries are in local time, not UTC. 77 return time.mktime(time.strptime(ts_str, "%Y-%m-%dT%H:%M:%S")) 78 79 def ults_to_rfc3339_str(ts_str): 80 """Returns a timestamp representing 'ts_str', which should be in 81 updatelog format. 82 """ 83 ltime = ults_to_ts(ts_str) 84 # Currently, updatelog entries are in local time, not UTC. 85 return dt_to_rfc3339_str(datetime.datetime( 86 *time.gmtime(ltime)[0:6])) 87 88 def fmri_to_taguri(rcfg, f): 89 """Generates a 'tag' uri compliant with RFC 4151. Visit 90 http://www.taguri.org/ for more information. 91 """ 92 return "tag:%s,%s:%s" % (rcfg.get_attribute("feed", 93 "authority"), f.get_timestamp().strftime("%Y-%m-%d"), 94 urllib.unquote(f.get_url_path())) 95 96 def init(scfg, rcfg): 97 """This function performs general initialization work that is needed 98 for feeds to work correctly. 99 """ 100 101 if not scfg.is_read_only(): 102 # RSS/Atom feeds require a unique identifier, so 103 # generate one if isn't defined already. This 104 # needs to be a persistent value, so we only 105 # generate this if we can save the configuration. 106 fid = rcfg.get_attribute("feed", "id") 107 if not fid: 108 # Create a random UUID (type 4). 109 rcfg._set_attribute("feed", "id", uuid.uuid4()) 110 111 # Ensure any configuration changes are reflected in the feed. 112 __clear_cache(scfg) 113 114 def set_title(request, rcfg, doc, feed, update_ts): 115 """This function attaches the necessary RSS/Atom feed elements needed 116 to provide title, author and contact information to the provided 117 xmini document object using the provided feed object and update 118 time. 119 """ 120 121 t = doc.createElement("title") 122 ti = xmini.Text() 123 ti.replaceWholeText(rcfg.get_attribute("feed", "name")) 124 t.appendChild(ti) 125 feed.appendChild(t) 126 127 l = doc.createElement("link") 128 l.setAttribute("href", cherrypy.url()) 129 l.setAttribute("rel", "self") 130 feed.appendChild(l) 131 132 # Atom requires each feed to have a permanent, universally unique 133 # identifier. 134 i = doc.createElement("id") 135 it = xmini.Text() 136 it.replaceWholeText("urn:uuid:%s" % rcfg.get_attribute("feed", "id")) 137 i.appendChild(it) 138 feed.appendChild(i) 139 140 # Indicate when the feed was last updated. 141 u = doc.createElement("updated") 142 ut = xmini.Text() 143 ut.replaceWholeText(dt_to_rfc3339_str(update_ts)) 144 u.appendChild(ut) 145 feed.appendChild(u) 146 147 # Add our icon. 148 i = doc.createElement("icon") 149 it = xmini.Text() 150 it.replaceWholeText(get_res_path(request, rcfg.get_attribute( 151 "feed", "icon"))) 152 i.appendChild(it) 153 feed.appendChild(i) 154 155 # Add our logo. 156 l = doc.createElement("logo") 157 lt = xmini.Text() 158 lt.replaceWholeText(get_res_path(request, rcfg.get_attribute( 159 "feed", "logo"))) 160 l.appendChild(lt) 161 feed.appendChild(l) 162 163 maintainer = rcfg.get_attribute("repository", "maintainer") 164 # The author information isn't required, but can be useful. 165 if maintainer: 166 name, email = rfc822.AddressList(maintainer).addresslist[0] 167 168 if email and not name: 169 # If we got an email address, but no name, then 170 # the name was likely parsed as a local address. In 171 # that case, assume the whole string is the name. 172 name = maintainer 173 email = None 174 175 a = doc.createElement("author") 176 177 # First we have to add a name element. This is required if an 178 # author element exists. 179 n = doc.createElement("name") 180 nt = xmini.Text() 181 nt.replaceWholeText(name) 182 n.appendChild(nt) 183 a.appendChild(n) 184 185 if email: 186 # If we were able to extract an email address from the 187 # maintainer information, add the optional email 188 # element to provide a point of communication. 189 e = doc.createElement("email") 190 et = xmini.Text() 191 et.replaceWholeText(email) 192 e.appendChild(et) 193 a.appendChild(e) 194 195 # Done with the author. 196 feed.appendChild(a) 197 198 operations = { 199 "+": ["Added", "%s was added to the repository."], 200 "-": ["Removed", "%s was removed from the repository."], 201 "U": ["Updated", "%s, an update to an existing package, was added to " 202 "the repository."] 203 } 204 205 def add_transaction(request, scfg, rcfg, doc, feed, txn, fmris): 206 """Each transaction is an entry. We have non-trivial content, so we 207 can omit summary elements. 208 """ 209 210 e = doc.createElement("entry") 211 212 tag, fmri_str = txn["catalog"].split() 213 f = fmri.PkgFmri(fmri_str) 214 215 # Generate a 'tag' uri, to uniquely identify the entry, using the fmri. 216 i = xmini.Text() 217 i.replaceWholeText(fmri_to_taguri(rcfg, f)) 218 eid = doc.createElement("id") 219 eid.appendChild(i) 220 e.appendChild(eid) 221 222 # Attempt to determine the operation that was performed and generate 223 # the entry title and content. 224 if txn["operation"] in operations: 225 op_title, op_content = operations[txn["operation"]] 226 else: 227 # XXX Better way to reflect an error? (Aborting will make a 228 # non-well-formed document.) 229 op_title = "Unknown Operation" 230 op_content = "%s was changed in the repository." 231 232 if txn["operation"] == "+": 233 # Get all FMRIs matching the current FMRI's package name. 234 matches = fmris[f.pkg_name] 235 if len(matches["versions"]) > 1: 236 # Get the oldest fmri. 237 of = matches[str(matches["versions"][0])][0] 238 239 # If the current fmri isn't the oldest one, then this 240 # is an update to the package. 241 if f != of: 242 # If there is more than one matching FMRI, and 243 # it isn't the same version as the oldest one, 244 # we can assume that this is an update to an 245 # existing package. 246 op_title, op_content = operations["U"] 247 248 # Now add a title for our entry. 249 etitle = doc.createElement("title") 250 ti = xmini.Text() 251 ti.replaceWholeText(" ".join([op_title, fmri_str])) 252 etitle.appendChild(ti) 253 e.appendChild(etitle) 254 255 # Indicate when the entry was last updated (in this case, when the 256 # package was added). 257 eu = doc.createElement("updated") 258 ut = xmini.Text() 259 ut.replaceWholeText(ults_to_rfc3339_str(txn["timestamp"])) 260 eu.appendChild(ut) 261 e.appendChild(eu) 262 263 # Link to the info output for the given package FMRI. 264 e_uri = get_rel_path(request, 'info/0/%s' % f.get_url_path()) 265 266 l = doc.createElement("link") 267 l.setAttribute("rel", "alternate") 268 l.setAttribute("href", e_uri) 269 e.appendChild(l) 270 271 # Using the description for the operation performed, add the FMRI and 272 # tag information. 273 content_text = op_content % fmri_str 274 if tag == "C": 275 content_text += " This version is tagged as critical." 276 277 co = xmini.Text() 278 co.replaceWholeText(content_text) 279 ec = doc.createElement("content") 280 ec.appendChild(co) 281 e.appendChild(ec) 282 283 feed.appendChild(e) 284 285 def update(request, scfg, rcfg, t, cf): 286 """Generate new Atom document for current updates. The cached feed 287 file is written to scfg.repo_root/CACHE_FILENAME. 288 """ 289 290 # Our configuration is stored in hours, convert it to seconds. 291 window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60 292 feed_ts = datetime.datetime.fromtimestamp(t - window_seconds) 293 294 d = xmini.Document() 295 296 feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed") 297 feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom") 298 299 set_title(request, rcfg, d, feed, scfg.updatelog.last_update) 300 301 d.appendChild(feed) 302 303 # The feed should be presented in reverse chronological order. 304 def compare_ul_entries(a, b): 305 return cmp(ults_to_ts(a["timestamp"]), 306 ults_to_ts(b["timestamp"])) 307 308 # Get the entire catalog in the format returned by catalog.cache_fmri, 309 # so that we don't have to keep looking for possible matches. 310 fmris = {} 311 catalog.ServerCatalog.read_catalog(fmris, 312 scfg.updatelog.catalog.catalog_root) 313 314 for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts), 315 cmp=compare_ul_entries, reverse=True): 316 add_transaction(request, scfg, rcfg, d, feed, txn, fmris) 317 318 d.writexml(cf) 319 320 def __get_cache_pathname(scfg): 321 return os.path.join(scfg.repo_root, CACHE_FILENAME) 322 323 def __clear_cache(scfg): 324 if scfg.is_read_only(): 325 # Ignore the request due to server configuration. 326 return 327 328 pathname = __get_cache_pathname(scfg) 329 try: 330 if os.path.exists(pathname): 331 os.remove(pathname) 332 except IOError: 333 raise cherrypy.HTTPError( 334 httplib.INTERNAL_SERVER_ERROR, 335 "Unable to clear feed cache.") 336 337 def __cache_needs_update(scfg): 338 """Checks to see if the feed cache file exists and if it is still 339 valid. Returns False, None if the cache is valid or True, last 340 where last is a timestamp representing when the cache was 341 generated. 342 """ 343 cfpath = __get_cache_pathname(scfg) 344 last = None 345 need_update = True 346 if os.path.isfile(cfpath): 347 # Attempt to parse the cached copy. If we can't, for any 348 # reason, assume we need to remove it and start over. 349 try: 350 d = xmini.parse(cfpath) 351 except Exception: 352 d = None 353 __clear_cache(scfg) 354 355 # Get the feed element and attempt to get the time we last 356 # generated the feed to determine whether we need to regenerate 357 # it. If for some reason we can't get that information, assume 358 # the cache is invalid, clear it, and force regeneration. 359 fe = None 360 if d: 361 fe = d.childNodes[0] 362 363 if fe: 364 utn = None 365 for cnode in fe.childNodes: 366 if cnode.nodeName == "updated": 367 utn = cnode.childNodes[0] 368 break 369 370 if utn: 371 last_ts = rfc3339_str_to_dt(utn.nodeValue) 372 373 # Since our feed cache and updatelog might have 374 # been created within the same second, we need 375 # to ignore small variances when determining 376 # whether to update the feed cache. 377 update_ts = scfg.updatelog.last_update.replace( 378 microsecond=0) 379 380 if last_ts >= update_ts: 381 need_update = False 382 else: 383 last = rfc3339_str_to_ts(utn.nodeValue) 384 else: 385 __clear_cache(scfg) 386 else: 387 __clear_cache(scfg) 388 389 return need_update, last 390 391 def handle(scfg, rcfg, request, response): 392 """If there have been package updates since we last generated the feed, 393 update the feed and send it to the client. Otherwise, send them the 394 cached copy if it is available. 395 """ 396 397 cfpath = __get_cache_pathname(scfg) 398 399 # First check to see if we already have a valid cache of the feed. 400 need_update, last = __cache_needs_update(scfg) 401 402 if need_update: 403 # Update always looks at feed.window seconds before the last 404 # update until "now." If last is none, we want it to use "now" 405 # as its starting point. 406 if last is None: 407 last = time.time() 408 409 if scfg.is_read_only(): 410 # If the server is operating in readonly mode, the 411 # feed will have to be generated every time. 412 cf = cStringIO.StringIO() 413 update(request, scfg, rcfg, last, cf) 414 cf.seek(0) 415 buf = cf.read() 416 cf.close() 417 418 # Now that the feed has been generated, set the headers 419 # correctly and return it. 420 response.headers['Content-type'] = MIME_TYPE 421 422 # Return the current time and date in GMT. 423 response.headers['Last-Modified'] = rfc822.formatdate() 424 425 response.headers['Content-length'] = len(buf) 426 return buf 427 else: 428 # If the server isn't operating in readonly mode, the 429 # feed can be generated and cached in inst_dir. 430 cf = file(cfpath, "w") 431 update(request, scfg, rcfg, last, cf) 432 cf.close() 433 434 return serve_file(cfpath, MIME_TYPE) 435