diff --git a/py/apigen/htmlgen.py b/py/apigen/htmlgen.py index 3e67b6e06..7b86fd99f 100644 --- a/py/apigen/htmlgen.py +++ b/py/apigen/htmlgen.py @@ -361,12 +361,12 @@ class ApiPageBuilder(AbstractPageBuilder): H.a('source: %s' % (sourcefile,), href=self.linker.get_lazyhref(sourcefile)), H.br(), - H.SourceDef(H.pre(callable_source))) + H.SourceDef(H.pre(unicode(callable_source, 'UTF-8')))) elif not is_in_pkg and sourcefile and callable_source: csource = H.div(H.br(), H.em('source: %s' % (sourcefile,)), H.br(), - H.SourceDef(H.pre(callable_source))) + H.SourceDef(H.pre(unicode(callable_source, 'UTF-8')))) else: csource = H.SourceDef('could not get source file') @@ -460,6 +460,8 @@ class ApiPageBuilder(AbstractPageBuilder): H.Docstring(docstring or '*no docstring available*') ) for dotted_name in sorted(item_dotted_names): + if dotted_name.startswith('_'): + continue itemname = dotted_name.split('.')[-1] if is_private(itemname): continue @@ -586,7 +588,7 @@ class ApiPageBuilder(AbstractPageBuilder): elif lastlevel and build_children: # XXX hack navitems += build_nav_level('%s.' % (dotted_name,), - depth+2) + depth+1) return navitems @@ -698,9 +700,9 @@ class ApiPageBuilder(AbstractPageBuilder): mangled = [] for i, sline in enumerate(str(source).split('\n')): if i == lineno: - l = '-> %s' % (sline,) + l = '-> %s' % (unicode(sline, 'UTF-8'),) else: - l = ' %s' % (sline,) + l = ' %s' % (unicode(sline, 'UTF-8'),) mangled.append(l) if sourcefile: linktext = '%s - line %s' % (sourcefile, line.lineno + 1) diff --git a/py/apigen/source/html.py b/py/apigen/source/html.py index c02941084..79d34e092 100644 --- a/py/apigen/source/html.py +++ b/py/apigen/source/html.py @@ -2,14 +2,13 @@ """ html - generating ad-hoc html out of source browser """ +import py from py.xml import html, raw from compiler import ast import time from py.__.apigen.source.color import Tokenizer, PythonSchema class HtmlEnchanter(object): - reserved_words = ['if', 'for', 'return', 'yield'] - def __init__(self, mod): self.mod = mod self.create_caches() @@ -37,8 +36,30 @@ class HtmlEnchanter(object): except KeyError: return [row] # no more info +def prepare_line(text, tokenizer, encoding): + """ adds html formatting to text items (list) + + only processes items if they're of a string type (or unicode) + """ + ret = [] + for item in text: + if type(item) in [str, unicode]: + tokens = tokenizer.tokenize(item) + for t in tokens: + data = unicode(t.data, encoding) + if t.type in ['keyword', 'alt_keyword', 'number', + 'string', 'comment']: + ret.append(html.span(data, class_=t.type)) + else: + ret.append(data) + else: + ret.append(item) + return ret + class HTMLDocument(object): - def __init__(self, tokenizer=None): + def __init__(self, encoding, tokenizer=None): + self.encoding = encoding + self.html = root = html.html() self.head = head = self.create_head() root.append(head) @@ -119,30 +140,11 @@ class HTMLDocument(object): table.append(tbody) return table, tbody - def prepare_line(self, text): - """ adds html formatting to text items (list) - - only processes items if they're of a string type (or unicode) - """ - ret = [] - for item in text: - if type(item) in [str, unicode]: - tokens = self.tokenizer.tokenize(item) - for t in tokens: - if t.type in ['keyword', 'alt_keyword', 'number', - 'string', 'comment']: - ret.append(html.span(t.data, class_=t.type)) - else: - ret.append(t.data) - else: - ret.append(item) - return ret - def add_row(self, lineno, text): if text == ['']: text = [raw(' ')] else: - text = self.prepare_line(text) + text = prepare_line(text, self.tokenizer, self.encoding) self.tbody.append(html.tr(html.td(str(lineno), class_='lineno'), html.td(class_='code', *text))) @@ -157,7 +159,8 @@ def create_html(mod): lines = mod.path.open().readlines() enchanter = HtmlEnchanter(mod) - doc = HTMLDocument() + enc = get_module_encoding(mod.path) + doc = HTMLDocument(enc) for i, row in enumerate(lines): row = enchanter.enchant_row(i + 1, row) doc.add_row(i + 1, row) @@ -248,3 +251,16 @@ def create_unknown_html(path): ) return h.unicode() +_reg_enc = py.std.re.compile(r'coding[:=]\s*([-\w.]+)') +def get_module_encoding(path): + if hasattr(path, 'strpath'): + path = path.strpath + if path[-1] in ['c', 'o']: + path = path[:-1] + fpath = py.path.local(path) + code = fpath.read() + match = _reg_enc.search(code) + if match: + return match.group(1) + return 'ISO-8859-1' + diff --git a/py/apigen/source/testing/test_html.py b/py/apigen/source/testing/test_html.py index 7f008b3ea..1e62ef8fe 100644 --- a/py/apigen/source/testing/test_html.py +++ b/py/apigen/source/testing/test_html.py @@ -1,9 +1,12 @@ +# -*- coding: UTF-8 -*- """ test of html generation """ -from py.__.apigen.source.html import create_html, HTMLDocument +from py.__.apigen.source.html import prepare_line, create_html, HTMLDocument, \ + get_module_encoding from py.__.apigen.source.browser import parse_path +from py.__.apigen.source.color import Tokenizer, PythonSchema from py.xml import html import py @@ -49,7 +52,7 @@ def test_basic(): class _HTMLDocument(HTMLDocument): def __init__(self): - pass + self.encoding = 'ascii' class TestHTMLDocument(object): def test_head(self): @@ -73,51 +76,8 @@ class TestHTMLDocument(object): assert isinstance(tbody, html.tbody) assert tbody == table[0] - def prepare_line(self, line, doc=None): - if doc is None: - doc = HTMLDocument() - l = doc.prepare_line(line) - return ''.join([unicode(i) for i in l]) - - def test_prepare_line_basic(self): - result = self.prepare_line(['see if this works']) - assert result == 'see if this works' - result = self.prepare_line(['see if this ', - html.a('works', name='works'),' too']) - assert result == ('see if this ' - 'works too') - result = self.prepare_line(['see if something else works']) - assert result == ('see if something ' - 'else works') - result = self.prepare_line(['see if something ', - html.a('else', name='else'), ' works too']) - assert result == ('see if something ' - 'else works too') - - def test_prepare_line_strings(self): - result = self.prepare_line(['foo = "bar"']) - assert result == 'foo = "bar"' - - result = self.prepare_line(['"spam"']) - assert result == '"spam"' - - # test multiline strings - doc = HTMLDocument() - result = self.prepare_line(['"""start of multiline'], doc) - assert result == ('"""start of ' - 'multiline') - # doc should now be in 'string mode' - result = self.prepare_line(['see if it doesn\'t touch this'], doc) - assert result == ('see if it doesn't touch ' - 'this') - result = self.prepare_line(['"""'], doc) - assert result == '"""' - result = self.prepare_line(['see if it colours this again'], doc) - assert result == ('see if it colours ' - 'this again') - def test_add_row(self): - doc = HTMLDocument() + doc = HTMLDocument('ascii') doc.add_row(1, ['""" this is a foo implementation """']) doc.add_row(2, ['']) doc.add_row(3, ['class ', html.a('Foo', name='Foo'), ':']) @@ -141,9 +101,79 @@ class TestHTMLDocument(object): '') def test_unicode(self): - doc = HTMLDocument() + doc = HTMLDocument('ascii') h = unicode(doc) print h assert py.std.re.match(r'\s*\s*[^<]+' '.*\w*$', h, py.std.re.S) +def prepare_line_helper(line, tokenizer=None, encoding='ascii'): + if tokenizer is None: + tokenizer = Tokenizer(PythonSchema) + l = prepare_line(line, tokenizer, encoding) + return ''.join([unicode(i) for i in l]) + +def test_prepare_line_basic(): + result = prepare_line_helper(['see if this works']) + assert result == 'see if this works' + result = prepare_line_helper(['see if this ', + html.a('works', name='works'),' too']) + assert result == ('see if this ' + 'works too') + result = prepare_line_helper(['see if something else works']) + assert result == ('see if something ' + 'else works') + result = prepare_line_helper(['see if something ', + html.a('else', name='else'), ' works too']) + assert result == ('see if something ' + 'else works too') + +def test_prepare_line_strings(): + result = prepare_line_helper(['foo = "bar"']) + assert result == 'foo = "bar"' + + result = prepare_line_helper(['"spam"']) + assert result == '"spam"' + +def test_prepare_line_multiline_strings(): + # test multiline strings + t = Tokenizer(PythonSchema) + result = prepare_line_helper(['"""start of multiline'], t) + assert result == ('"""start of ' + 'multiline') + result = prepare_line_helper(['see if it doesn\'t touch this'], t) + assert result == ('see if it doesn't touch ' + 'this') + result = prepare_line_helper(['"""'], t) + assert result == '"""' + result = prepare_line_helper(['see if it colours this again'], t) + assert result == ('see if it colours ' + 'this again') + +def test_prepare_line_nonascii(): + result = prepare_line_helper(['"föö"'], encoding='UTF-8') + assert (result == + unicode('"föö"', 'UTF-8')) + +def test_get_encoding_ascii(): + temp = py.test.ensuretemp('test_get_encoding') + fpath = temp.join('ascii.py') + fpath.write(str(py.code.Source("""\ + def foo(): + return 'foo' + """))) + # XXX I think the specs say we have to assume latin-1 here... + assert get_module_encoding(fpath.strpath) == 'ISO-8859-1' + +def test_get_encoding_for_real(): + temp = py.test.ensuretemp('test_get_encoding') + fpath = temp.join('utf-8.py') + fpath.write(str(py.code.Source("""\ + #!/usr/bin/env python + # -*- coding: UTF-8 -*- + + def foo(): + return 'föö' + """))) + assert get_module_encoding(fpath.strpath) == 'UTF-8' + diff --git a/py/apigen/testing/test_apigen_functional.py b/py/apigen/testing/test_apigen_functional.py index 3b48c9402..decf954b1 100644 --- a/py/apigen/testing/test_apigen_functional.py +++ b/py/apigen/testing/test_apigen_functional.py @@ -38,6 +38,8 @@ def setup_fs_project(name): return 'bar' def baz(qux): return qux + def _hidden(): + return 'quux' """)) temp.ensure("pak/__init__.py").write(py.code.Source("""\ from py.initpkg import initpkg @@ -77,6 +79,8 @@ def setup_fs_project(name): ''') c = compile(str(source), '', 'exec') exec c in globals() + + assert pak.somenamespace._hidden() == 'quux' """)) return temp, 'pak' diff --git a/py/apigen/todo-apigen.txt b/py/apigen/todo-apigen.txt index b2f2a32ee..1c9836ba8 100644 --- a/py/apigen/todo-apigen.txt +++ b/py/apigen/todo-apigen.txt @@ -1,5 +1,5 @@ -* format docstrings more nicely (with tests) +* format docstrings more nicely (with tests) - DONE I guess * have the API function view be as informative as possible without having to go to the "single method" view @@ -10,7 +10,9 @@ viewed. method views (when navigating there through the class view) should also have the source there -* have class-level attributes be displayed + DONE I guess (todo: add syntax coloring) + +* have class-level attributes be displayed * use "inherited" doc strings, i.e. for class A: @@ -30,11 +32,11 @@ be separately tested and the caller should not need to guess what it will get, i think) + DONE + * look out for and streamline all apigen/source-viewer documentation into one document - - * consider automating dependencies: e.g. something like: queue_render(page, fspath, linker, ...) @@ -61,8 +63,22 @@ ... raise ... + NOT SURE if this is still required + * also we might have a support function for tests that fills the linker with "dummy hrefs" for certain types like source links + + KIND OF DONE, the tests now use a linker that just doesn't + barf on non-existing linkids anymore, which seems to be + good enough (we may want to add more sophisticated debugging + later, but for now this works) -* XXX list more here +* add syntax coloring for Python source snippets + +* remove py.test/apigen cruft from stack traces + +* fix non-ascii source encoding support + +* XXX +