1 #!/usr/bin/python2.4
2 #
3 # CDDL HEADER START
4 #
5 # The contents of this file are subject to the terms of the
6 # Common Development and Distribution License (the "License").
7 # You may not use this file except in compliance with the License.
8 #
9 # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 # or http://www.opensolaris.org/os/licensing.
11 # See the License for the specific language governing permissions
12 # and limitations under the License.
13 #
14 # When distributing Covered Code, include this CDDL HEADER in each
15 # file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 # If applicable, add the following below this CDDL HEADER, with the
17 # fields enclosed by brackets "[]" replaced with your own identifying
18 # information: Portions Copyright [yyyy] [name of copyright owner]
19 #
20 # CDDL HEADER END
21 #
22 # Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 # Use is subject to license terms.
24
25 """feed - routines for generating RFC 4287 Atom feeds for packaging server
26
27 At present, the pkg.server.feed module provides a set of routines that, from
28 a catalog, allow the construction of a feed representing the activity within
29 a given time period."""
30
31 import cherrypy
32 from cherrypy.lib.static import serve_file
33 import cStringIO
34 import datetime
35 import httplib
36 import os
37 import rfc822
38 import sys
39 import time
40 import urllib
41 import xml.dom.minidom as xmini
42
43 from pkg.misc import get_rel_path, get_res_path
44 import pkg.server.catalog as catalog
45 import pkg.fmri as fmri
46 import pkg.Uuid25 as uuid
47
48 MIME_TYPE = 'application/atom+xml'
49 CACHE_FILENAME = "feed.xml"
50 RFC3339_FMT = "%Y-%m-%dT%H:%M:%SZ"
51
52 def dt_to_rfc3339_str(ts):
53 """Returns a string representing a datetime object formatted according
54 to RFC 3339.
55 """
56 return ts.strftime(RFC3339_FMT)
57
58 def rfc3339_str_to_ts(ts_str):
59 """Returns a timestamp representing 'ts_str', which should be in the
60 format specified by RFC 3339.
61 """
62 return time.mktime(time.strptime(ts_str, RFC3339_FMT))
63
64 def rfc3339_str_to_dt(ts_str):
65 """Returns a datetime object representing 'ts_str', which should be in
66 the format specified by RFC 3339.
67 """
68 return datetime.datetime(*time.strptime(ts_str, RFC3339_FMT)[0:6])
69
70 def ults_to_ts(ts_str):
71 """Returns a timestamp representing 'ts_str', which should be in
72 updatelog format.
73 """
74 # Python doesn't support fractional seconds for strptime.
75 ts_str = ts_str.split('.')[0]
76 # Currently, updatelog entries are in local time, not UTC.
77 return time.mktime(time.strptime(ts_str, "%Y-%m-%dT%H:%M:%S"))
78
79 def ults_to_rfc3339_str(ts_str):
80 """Returns a timestamp representing 'ts_str', which should be in
81 updatelog format.
82 """
83 ltime = ults_to_ts(ts_str)
84 # Currently, updatelog entries are in local time, not UTC.
85 return dt_to_rfc3339_str(datetime.datetime(
86 *time.gmtime(ltime)[0:6]))
87
88 def fmri_to_taguri(rcfg, f):
89 """Generates a 'tag' uri compliant with RFC 4151. Visit
90 http://www.taguri.org/ for more information.
91 """
92 return "tag:%s,%s:%s" % (rcfg.get_attribute("feed",
93 "authority"), f.get_timestamp().strftime("%Y-%m-%d"),
94 urllib.unquote(f.get_url_path()))
95
96 def init(scfg, rcfg):
97 """This function performs general initialization work that is needed
98 for feeds to work correctly.
99 """
100
101 if not scfg.is_read_only():
102 # RSS/Atom feeds require a unique identifier, so
103 # generate one if isn't defined already. This
104 # needs to be a persistent value, so we only
105 # generate this if we can save the configuration.
106 fid = rcfg.get_attribute("feed", "id")
107 if not fid:
108 # Create a random UUID (type 4).
109 rcfg._set_attribute("feed", "id", uuid.uuid4())
110
111 # Ensure any configuration changes are reflected in the feed.
112 __clear_cache(scfg)
113
114 def set_title(request, rcfg, doc, feed, update_ts):
115 """This function attaches the necessary RSS/Atom feed elements needed
116 to provide title, author and contact information to the provided
117 xmini document object using the provided feed object and update
118 time.
119 """
120
121 t = doc.createElement("title")
122 ti = xmini.Text()
123 ti.replaceWholeText(rcfg.get_attribute("feed", "name"))
124 t.appendChild(ti)
125 feed.appendChild(t)
126
127 l = doc.createElement("link")
128 l.setAttribute("href", cherrypy.url())
129 l.setAttribute("rel", "self")
130 feed.appendChild(l)
131
132 # Atom requires each feed to have a permanent, universally unique
133 # identifier.
134 i = doc.createElement("id")
135 it = xmini.Text()
136 it.replaceWholeText("urn:uuid:%s" % rcfg.get_attribute("feed", "id"))
137 i.appendChild(it)
138 feed.appendChild(i)
139
140 # Indicate when the feed was last updated.
141 u = doc.createElement("updated")
142 ut = xmini.Text()
143 ut.replaceWholeText(dt_to_rfc3339_str(update_ts))
144 u.appendChild(ut)
145 feed.appendChild(u)
146
147 # Add our icon.
148 i = doc.createElement("icon")
149 it = xmini.Text()
150 it.replaceWholeText(get_res_path(request, rcfg.get_attribute(
151 "feed", "icon")))
152 i.appendChild(it)
153 feed.appendChild(i)
154
155 # Add our logo.
156 l = doc.createElement("logo")
157 lt = xmini.Text()
158 lt.replaceWholeText(get_res_path(request, rcfg.get_attribute(
159 "feed", "logo")))
160 l.appendChild(lt)
161 feed.appendChild(l)
162
163 maintainer = rcfg.get_attribute("repository", "maintainer")
164 # The author information isn't required, but can be useful.
165 if maintainer:
166 name, email = rfc822.AddressList(maintainer).addresslist[0]
167
168 if email and not name:
169 # If we got an email address, but no name, then
170 # the name was likely parsed as a local address. In
171 # that case, assume the whole string is the name.
172 name = maintainer
173 email = None
174
175 a = doc.createElement("author")
176
177 # First we have to add a name element. This is required if an
178 # author element exists.
179 n = doc.createElement("name")
180 nt = xmini.Text()
181 nt.replaceWholeText(name)
182 n.appendChild(nt)
183 a.appendChild(n)
184
185 if email:
186 # If we were able to extract an email address from the
187 # maintainer information, add the optional email
188 # element to provide a point of communication.
189 e = doc.createElement("email")
190 et = xmini.Text()
191 et.replaceWholeText(email)
192 e.appendChild(et)
193 a.appendChild(e)
194
195 # Done with the author.
196 feed.appendChild(a)
197
198 operations = {
199 "+": ["Added", "%s was added to the repository."],
200 "-": ["Removed", "%s was removed from the repository."],
201 "U": ["Updated", "%s, an update to an existing package, was added to "
202 "the repository."]
203 }
204
205 def add_transaction(request, scfg, rcfg, doc, feed, txn, fmris):
206 """Each transaction is an entry. We have non-trivial content, so we
207 can omit summary elements.
208 """
209
210 e = doc.createElement("entry")
211
212 tag, fmri_str = txn["catalog"].split()
213 f = fmri.PkgFmri(fmri_str)
214
215 # Generate a 'tag' uri, to uniquely identify the entry, using the fmri.
216 i = xmini.Text()
217 i.replaceWholeText(fmri_to_taguri(rcfg, f))
218 eid = doc.createElement("id")
219 eid.appendChild(i)
220 e.appendChild(eid)
221
222 # Attempt to determine the operation that was performed and generate
223 # the entry title and content.
224 if txn["operation"] in operations:
225 op_title, op_content = operations[txn["operation"]]
226 else:
227 # XXX Better way to reflect an error? (Aborting will make a
228 # non-well-formed document.)
229 op_title = "Unknown Operation"
230 op_content = "%s was changed in the repository."
231
232 if txn["operation"] == "+":
233 # Get all FMRIs matching the current FMRI's package name.
234 matches = fmris[f.pkg_name]
235 if len(matches["versions"]) > 1:
236 # Get the oldest fmri.
237 of = matches[str(matches["versions"][0])][0]
238
239 # If the current fmri isn't the oldest one, then this
240 # is an update to the package.
241 if f != of:
242 # If there is more than one matching FMRI, and
243 # it isn't the same version as the oldest one,
244 # we can assume that this is an update to an
245 # existing package.
246 op_title, op_content = operations["U"]
247
248 # Now add a title for our entry.
249 etitle = doc.createElement("title")
250 ti = xmini.Text()
251 ti.replaceWholeText(" ".join([op_title, fmri_str]))
252 etitle.appendChild(ti)
253 e.appendChild(etitle)
254
255 # Indicate when the entry was last updated (in this case, when the
256 # package was added).
257 eu = doc.createElement("updated")
258 ut = xmini.Text()
259 ut.replaceWholeText(ults_to_rfc3339_str(txn["timestamp"]))
260 eu.appendChild(ut)
261 e.appendChild(eu)
262
263 # Link to the info output for the given package FMRI.
264 e_uri = get_rel_path(request, 'info/0/%s' % f.get_url_path())
265
266 l = doc.createElement("link")
267 l.setAttribute("rel", "alternate")
268 l.setAttribute("href", e_uri)
269 e.appendChild(l)
270
271 # Using the description for the operation performed, add the FMRI and
272 # tag information.
273 content_text = op_content % fmri_str
274 if tag == "C":
275 content_text += " This version is tagged as critical."
276
277 co = xmini.Text()
278 co.replaceWholeText(content_text)
279 ec = doc.createElement("content")
280 ec.appendChild(co)
281 e.appendChild(ec)
282
283 feed.appendChild(e)
284
285 def update(request, scfg, rcfg, t, cf):
286 """Generate new Atom document for current updates. The cached feed
287 file is written to scfg.repo_root/CACHE_FILENAME.
288 """
289
290 # Our configuration is stored in hours, convert it to seconds.
291 window_seconds = rcfg.get_attribute("feed", "window") * 60 * 60
292 feed_ts = datetime.datetime.fromtimestamp(t - window_seconds)
293
294 d = xmini.Document()
295
296 feed = d.createElementNS("http://www.w3.org/2005/Atom", "feed")
297 feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
298
299 set_title(request, rcfg, d, feed, scfg.updatelog.last_update)
300
301 d.appendChild(feed)
302
303 # The feed should be presented in reverse chronological order.
304 def compare_ul_entries(a, b):
305 return cmp(ults_to_ts(a["timestamp"]),
306 ults_to_ts(b["timestamp"]))
307
308 # Get the entire catalog in the format returned by catalog.cache_fmri,
309 # so that we don't have to keep looking for possible matches.
310 fmris = {}
311 catalog.ServerCatalog.read_catalog(fmris,
312 scfg.updatelog.catalog.catalog_root)
313
314 for txn in sorted(scfg.updatelog.gen_updates_as_dictionaries(feed_ts),
315 cmp=compare_ul_entries, reverse=True):
316 add_transaction(request, scfg, rcfg, d, feed, txn, fmris)
317
318 d.writexml(cf)
319
320 def __get_cache_pathname(scfg):
321 return os.path.join(scfg.repo_root, CACHE_FILENAME)
322
323 def __clear_cache(scfg):
324 if scfg.is_read_only():
325 # Ignore the request due to server configuration.
326 return
327
328 pathname = __get_cache_pathname(scfg)
329 try:
330 if os.path.exists(pathname):
331 os.remove(pathname)
332 except IOError:
333 raise cherrypy.HTTPError(
334 httplib.INTERNAL_SERVER_ERROR,
335 "Unable to clear feed cache.")
336
337 def __cache_needs_update(scfg):
338 """Checks to see if the feed cache file exists and if it is still
339 valid. Returns False, None if the cache is valid or True, last
340 where last is a timestamp representing when the cache was
341 generated.
342 """
343 cfpath = __get_cache_pathname(scfg)
344 last = None
345 need_update = True
346 if os.path.isfile(cfpath):
347 # Attempt to parse the cached copy. If we can't, for any
348 # reason, assume we need to remove it and start over.
349 try:
350 d = xmini.parse(cfpath)
351 except Exception:
352 d = None
353 __clear_cache(scfg)
354
355 # Get the feed element and attempt to get the time we last
356 # generated the feed to determine whether we need to regenerate
357 # it. If for some reason we can't get that information, assume
358 # the cache is invalid, clear it, and force regeneration.
359 fe = None
360 if d:
361 fe = d.childNodes[0]
362
363 if fe:
364 utn = None
365 for cnode in fe.childNodes:
366 if cnode.nodeName == "updated":
367 utn = cnode.childNodes[0]
368 break
369
370 if utn:
371 last_ts = rfc3339_str_to_dt(utn.nodeValue)
372
373 # Since our feed cache and updatelog might have
374 # been created within the same second, we need
375 # to ignore small variances when determining
376 # whether to update the feed cache.
377 update_ts = scfg.updatelog.last_update.replace(
378 microsecond=0)
379
380 if last_ts >= update_ts:
381 need_update = False
382 else:
383 last = rfc3339_str_to_ts(utn.nodeValue)
384 else:
385 __clear_cache(scfg)
386 else:
387 __clear_cache(scfg)
388
389 return need_update, last
390
391 def handle(scfg, rcfg, request, response):
392 """If there have been package updates since we last generated the feed,
393 update the feed and send it to the client. Otherwise, send them the
394 cached copy if it is available.
395 """
396
397 cfpath = __get_cache_pathname(scfg)
398
399 # First check to see if we already have a valid cache of the feed.
400 need_update, last = __cache_needs_update(scfg)
401
402 if need_update:
403 # Update always looks at feed.window seconds before the last
404 # update until "now." If last is none, we want it to use "now"
405 # as its starting point.
406 if last is None:
407 last = time.time()
408
409 if scfg.is_read_only():
410 # If the server is operating in readonly mode, the
411 # feed will have to be generated every time.
412 cf = cStringIO.StringIO()
413 update(request, scfg, rcfg, last, cf)
414 cf.seek(0)
415 buf = cf.read()
416 cf.close()
417
418 # Now that the feed has been generated, set the headers
419 # correctly and return it.
420 response.headers['Content-type'] = MIME_TYPE
421
422 # Return the current time and date in GMT.
423 response.headers['Last-Modified'] = rfc822.formatdate()
424
425 response.headers['Content-length'] = len(buf)
426 return buf
427 else:
428 # If the server isn't operating in readonly mode, the
429 # feed can be generated and cached in inst_dir.
430 cf = file(cfpath, "w")
431 update(request, scfg, rcfg, last, cf)
432 cf.close()
433
434 return serve_file(cfpath, MIME_TYPE)
435