1 #!/usr/bin/python2.4 2 # 3 # CDDL HEADER START 4 # 5 # The contents of this file are subject to the terms of the 6 # Common Development and Distribution License (the "License"). 7 # You may not use this file except in compliance with the License. 8 # 9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 # or http://www.opensolaris.org/os/licensing. 11 # See the License for the specific language governing permissions 12 # and limitations under the License. 13 # 14 # When distributing Covered Code, include this CDDL HEADER in each 15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 # If applicable, add the following below this CDDL HEADER, with the 17 # fields enclosed by brackets "[]" replaced with your own identifying 18 # information: Portions Copyright [yyyy] [name of copyright owner] 19 # 20 # CDDL HEADER END 21 # 22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 # Use is subject to license terms. 24 25 """feed - routines for generating RFC 4287 Atom feeds for packaging server 26 27 At present, the pkg.server.feed module provides a set of routines that, from 28 a catalog, allow the construction of a feed representing the activity within 29 a given time period.""" 30 31 import cherrypy 32 from cherrypy.lib.static import serve_file 33 import cStringIO 34 import datetime 35 import httplib 36 import os 37 import rfc822 38 import time 39 import urllib 40 import xml.dom.minidom as xmini 41 42 from pkg.misc import get_rel_path, get_res_path 43 import pkg.catalog as catalog 44 import pkg.fmri as fmri 45 import pkg.Uuid25 as uuid 46 47 MIME_TYPE = 'application/atom+xml' 48 CACHE_FILENAME = "feed.xml" 49 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ" 50 51 def dt_to_rfc3339_str(ts): 52 """Returns a string representing a datetime object formatted according 53 to RFC 3339. 54 """ 55 return ts.strftime(RFC3339_FMT) 56 57 def rfc3339_str_to_ts(ts_str): 58 """Returns a timestamp representing 'ts_str', which should be in the 59 format specified by RFC 3339. 60 """ 61 return time.mktime(time.strptime(ts_str, RFC3339_FMT)) 62 63 def rfc3339_str_to_dt(ts_str): 64 """Returns a datetime object representing 'ts_str', which should be in 65 the format specified by RFC 3339. 66 """ 67 return datetime.datetime(*time.strptime(ts_str, RFC3339_FMT)[0:6]) 68 69 def ults_to_ts(ts_str): 70 """Returns a timestamp representing 'ts_str', which should be in 71 updatelog format. 72 """ 73 # Python doesn't support fractional seconds for strptime. 74 ts_str = ts_str.split('.')[0] 75 # Currently, updatelog entries are in local time, not UTC. 76 return time.mktime(time.strptime(ts_str, "%Y-%m-%dT%H:%M:%S")) 77 78 def ults_to_rfc3339_str(ts_str): 79 """Returns a timestamp representing 'ts_str', which should be in 80 updatelog format. 81 """ 82 ltime = ults_to_ts(ts_str) 83 # Currently, updatelog entries are in local time, not UTC. 84 return dt_to_rfc3339_str(datetime.datetime( 85 *time.gmtime(ltime)[0:6])) 86 87 def fmri_to_taguri(rcfg, f): 88 """Generates a 'tag' uri compliant with RFC 4151. Visit 89 http://www.taguri.org/ for more information. 90 """ 91 return "tag:%s,%s:%s" % (rcfg.get_attribute("feed", 92 "authority"), f.get_timestamp().strftime("%Y-%m-%d"), 93 urllib.unquote(f.get_url_path())) 94 95 def init(scfg, rcfg): 96 """This function performs general initialization work that is needed 97 for feeds to work correctly. 98 """ 99 100 if not scfg.is_read_only(): 101 # RSS/Atom feeds require a unique identifier, so 102 # generate one if isn't defined already. This 103 # needs to be a persistent value, so we only 104 # generate this if we can save the configuration. 105 fid = rcfg.get_attribute("feed", "id") 106 if not fid: 107 # Create a random UUID (type 4). 108 rcfg._set_attribute("feed", "id", uuid.uuid4()) 109 110 # Ensure any configuration changes are reflected in the feed. 111 __clear_cache(scfg) 112 113 def set_title(request, rcfg, doc, feed, update_ts): 114 """This function attaches the necessary RSS/Atom feed elements needed 115 to provide title, author and contact information to the provided 116 xmini document object using the provided feed object and update 117 time. 118 """ 119 120 t = doc.createElement("title") 121 ti = xmini.Text() 122 ti.replaceWholeText(rcfg.get_attribute("feed", "name")) 123 t.appendChild(ti) 124 feed.appendChild(t) 125 126 l = doc.createElement("link") 127 l.setAttribute("href", cherrypy.url()) 128 l.setAttribute("rel", "self") 129 feed.appendChild(l) 130 131 # Atom requires each feed to have a permanent, universally unique 132 # identifier. 133 i = doc.createElement("id") 134 it = xmini.Text() 135 it.replaceWholeText("urn:uuid:%s" % rcfg.get_attribute("feed", "id")) 136 i.appendChild(it) 137 feed.appendChild(i) 138 139 # Indicate when the feed was last updated. 140 u = doc.createElement("updated") 141 ut = xmini.Text() 142 ut.replaceWholeText(dt_to_rfc3339_str(update_ts)) 143 u.appendChild(ut) 144 feed.appendChild(u) 145 146 # Add our icon. 147 i = doc.createElement("icon") 148 it = xmini.Text() 149 it.replaceWholeText(get_res_path(request, rcfg.get_attribute( 150 "feed", "icon"))) 151 i.appendChild(it) 152 feed.appendChild(i) 153 154 # Add our logo. 155 l = doc.createElement("logo") 156 lt = xmini.Text() 157 lt.replaceWholeText(get_res_path(request, rcfg.get_attribute( 158 "feed", "logo"))) 159 l.appendChild(lt) 160 feed.appendChild(l) 161 162 maintainer = rcfg.get_attribute("repository", "maintainer") 163 # The author information isn't required, but can be useful. 164 if maintainer: 165 name, email = rfc822.AddressList(maintainer).addresslist[0] 166 167 if email and not name: 168 # If we got an email address, but no name, then 169 # the name was likely parsed as a local address. In 170 # that case, assume the whole string is the name. 171 name = maintainer 172 email = None 173 174 a = doc.createElement("author") 175 176 # First we have to add a name element. This is required if an 177 # author element exists. 178 n = doc.createElement("name") 179 nt = xmini.Text() 180 nt.replaceWholeText(name) 181 n.appendChild(nt) 182 a.appendChild(n) 183 184 if email: 185 # If we were able to extract an email address from the 186 # maintainer information, add the optional email 187 # element to provide a point of communication. 188 e = doc.createElement("email") 189 et = xmini.Text() 190 et.replaceWholeText(email) 191 e.appendChild(et) 192 a.appendChild(e) 193 194 # Done with the author. 195 feed.appendChild(a) 196 197 operations = { 198 "+": ["Added", "%s was added to the repository."], 199 "-": ["Removed", "%s was removed from the repository."], 200 "U": ["Updated", "%s, an update to an existing package, was added to " 201 "the repository."] 202 } 203 204 def add_transaction(request, scfg, rcfg, doc, feed, txn): 205 """Each transaction is an entry. We have non-trivial content, so we 206 can omit summary elements. 207 """ 208 209 e = doc.createElement("entry") 210 211 tag, fmri_str = txn["catalog"].split() 212 f = fmri.PkgFmri(fmri_str) 213 214 # Generate a 'tag' uri, to uniquely identify the entry, using the fmri. 215 i = xmini.Text() 216 i.replaceWholeText(fmri_to_taguri(rcfg, f)) 217 eid = doc.createElement("id") 218 eid.appendChild(i) 219 e.appendChild(eid) 220 221 # Attempt to determine the operation that was performed and generate 222 # the entry title and content. 223 if txn["operation"] in operations: 224 op_title, op_content = operations[txn["operation"]] 225 else: 226 # XXX Better way to reflect an error? (Aborting will make a 227 # non-well-formed document.) 228 op_title = "Unknown Operation" 229 op_content = "%s was changed in the repository." 230 231 if txn["operation"] == "+": 232 c = scfg.updatelog.catalog 233 # Get all FMRIs matching the current FMRI's package name. 234 matches = catalog.extract_matching_fmris(c.fmris(), 235 f.get_name(), matcher=fmri.exact_name_match) 236 237 if len(matches) > 1: 238 # Get the oldest fmri (it's the last entry). 239 of = matches[-1] 240 241 # If the current fmri isn't the oldest one, then this 242 # is an update to the package. 243 if f != of: 244 # If there is more than one matching FMRI, and 245 # it isn't the same version as the oldest one, 246 # we can assume that this is an update to an 247 # existing package. 248 op_title, op_content = operations["U"] 249 250 # Now add a title for our entry. 251 etitle = doc.createElement("title") 252 ti = xmini.Text() 253 ti.replaceWholeText(" ".join([op_title, fmri_str])) 254 etitle.appendChild(ti) 255 e.appendChild(etitle) 256 257 # Indicate when the entry was last updated (in this case, when the 258 # package was added). 259 eu = doc.createElement("updated") 260 ut = xmini.Text() 261 ut.replaceWholeText(ults_to_rfc3339_str(txn["timestamp"])) 262 eu.appendChild(ut) 263 e.appendChild(eu) 264 265 # Link to the info output for the given package FMRI. 266 e_uri = get_rel_path(request, 'info/0/%s' % f.get_url_path()) 267 268 l = doc.createElement("link") 269 l.setAttribute("rel", "alternate") 270 l.setAttribute("href", e_uri) 271 e.appendChild(l) 272 273 # Using the description for the operation performed, add the FMRI and 274 # tag information. 275 content_text = op_content % fmri_str 276 if tag == "C": 277 content_text += " This version is tagged as critical." 278 279 co = xmini.Text() 280 co.replaceWholeText(content_text) 281 ec = doc.createElement("content") 282 ec.appendChild(co) 283 e.appendChild(ec) 284 285 feed.appendChild(e) 286 287 def update(request, scfg, rcfg, t, cf): 288 """Generate new Atom document for current updates. The cached feed 289 file is written to scfg.repo_root/CACHE_FILENAME. 290 """ 291 292 # Our configuration is stored in hours, convert it to seconds. 293 window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60 294 feed_ts = datetime.datetime.fromtimestamp(t - window_seconds) 295 296 d = xmini.Document() 297 298 feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed") 299 feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom") 300 301 set_title(request, rcfg, d, feed, scfg.updatelog.last_update) 302 303 d.appendChild(feed) 304 305 # The feed should be presented in reverse chronological order. 306 def compare_ul_entries(a, b): 307 return cmp(ults_to_ts(a["timestamp"]), 308 ults_to_ts(b["timestamp"])) 309 310 for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts), 311 cmp=compare_ul_entries, reverse=True): 312 add_transaction(request, scfg, rcfg, d, feed, txn) 313 314 d.writexml(cf) 315 316 def __get_cache_pathname(scfg): 317 return os.path.join(scfg.repo_root, CACHE_FILENAME) 318 319 def __clear_cache(scfg): 320 if scfg.is_read_only(): 321 # Ignore the request due to server configuration. 322 return 323 324 pathname = __get_cache_pathname(scfg) 325 try: 326 if os.path.exists(pathname): 327 os.remove(pathname) 328 except IOError: 329 raise cherrypy.HTTPError( 330 httplib.INTERNAL_SERVER_ERROR, 331 "Unable to clear feed cache.") 332 333 def __cache_needs_update(scfg): 334 """Checks to see if the feed cache file exists and if it is still 335 valid. Returns False, None if the cache is valid or True, last 336 where last is a timestamp representing when the cache was 337 generated. 338 """ 339 cfpath = __get_cache_pathname(scfg) 340 last = None 341 need_update = True 342 if os.path.isfile(cfpath): 343 # Attempt to parse the cached copy. If we can't, for any 344 # reason, assume we need to remove it and start over. 345 try: 346 d = xmini.parse(cfpath) 347 except Exception: 348 d = None 349 __clear_cache(scfg) 350 351 # Get the feed element and attempt to get the time we last 352 # generated the feed to determine whether we need to regenerate 353 # it. If for some reason we can't get that information, assume 354 # the cache is invalid, clear it, and force regeneration. 355 fe = None 356 if d: 357 fe = d.childNodes[0] 358 359 if fe: 360 utn = None 361 for cnode in fe.childNodes: 362 if cnode.nodeName == "updated": 363 utn = cnode.childNodes[0] 364 break 365 366 if utn: 367 last_ts = rfc3339_str_to_dt(utn.nodeValue) 368 369 # Since our feed cache and updatelog might have 370 # been created within the same second, we need 371 # to ignore small variances when determining 372 # whether to update the feed cache. 373 update_ts = scfg.updatelog.last_update.replace( 374 microsecond=0) 375 376 if last_ts >= update_ts: 377 need_update = False 378 else: 379 last = rfc3339_str_to_ts(utn.nodeValue) 380 else: 381 __clear_cache(scfg) 382 else: 383 __clear_cache(scfg) 384 385 return need_update, last 386 387 def handle(scfg, rcfg, request, response): 388 """If there have been package updates since we last generated the feed, 389 update the feed and send it to the client. Otherwise, send them the 390 cached copy if it is available. 391 """ 392 393 cfpath = __get_cache_pathname(scfg) 394 395 # First check to see if we already have a valid cache of the feed. 396 need_update, last = __cache_needs_update(scfg) 397 398 if need_update: 399 # Update always looks at feed.window seconds before the last 400 # update until "now." If last is none, we want it to use "now" 401 # as its starting point. 402 if last is None: 403 last = time.time() 404 405 if scfg.is_read_only(): 406 # If the server is operating in readonly mode, the 407 # feed will have to be generated every time. 408 cf = cStringIO.StringIO() 409 update(request, scfg, rcfg, last, cf) 410 cf.seek(0) 411 buf = cf.read() 412 cf.close() 413 414 # Now that the feed has been generated, set the headers 415 # correctly and return it. 416 response.headers['Content-type'] = MIME_TYPE 417 response.headers['Last-Modified'] = \ 418 datetime.datetime.now().isoformat() 419 response.headers['Content-length'] = len(buf) 420 return buf 421 else: 422 # If the server isn't operating in readonly mode, the 423 # feed can be generated and cached in inst_dir. 424 cf = file(cfpath, "w") 425 update(request, scfg, rcfg, last, cf) 426 cf.close() 427 428 return serve_file(cfpath, MIME_TYPE) 429