LaTeX->XML->SQL - critique

Sean McIlroy

22/05/2005 à 06:06

# -*- coding: Latin-1 -*-

from cStringIO import StringIO
import re

def xReplace(t,a,b):
if not a: return t
return xReplace(t.replace(a[0],b[0]),a[1:],b[1:])

def remplacer(t,c,f):
while True:
try:
i = t.index(c)
j = i + t[i:].index('{')
k = j + t[j:].index('}')
t = t.replace(t[i:k+1],f(t[j+1:k]))
except: break
return t

def pardeux(iterable, n=2):
it = iter(iterable)
next = it.next
while True:
yield next(), next()

def normalize_whitespace(text): return ' '.join(text.split())

file_nom = '/home/remi/droit_tice/faq_gest.tex'
file_string = ''.join([x for x in open(file_nom,'r').readlines() if
x[0]<>'%'])

corps = r"""begin{document}(?P<contents>.*)end{document}"""
r0 =
['begin{enumerate}','end{enumerate}','begin{itemize}','end{itemize}']
r1 = ['<ol>','</ol>','<ul>','</ul>']

contents = re.compile(corps, re.IGNORECASE|
re.DOTALL).search(file_string).group('contents')
contents = xReplace(contents,r0,r1)
contents = remplacer(contents, 'url', lambda x: 'n<a href = "' + x +
'">' + x + '</a>')
contents = re.compile(r'item
(.*?)''n').sub(r'<li>1</li>n',contents)
contents = contents.replace(''','\'')
contents = remplacer(contents, 'textit', lambda x: 'n<i>' + x +
'</i>')

contents_list = re.compile(r'section{(.*)}').split(contents)

fout = StringIO()
fout.write('<?xml version="1.0" encoding="ISO-8859-1"?>n')
fout.write('n')
fout.write('<document>n')
fout.write('<introduction>%s</introduction>n' %
normalize_whitespace(contents_list[0]))

i = j = 1
for titre, contenu in pardeux(contents_list[1:]):
contenus_sections =
re.split(re.compile(r'subsection{(.*)}'),contenu)
fout.write('<section id="%s" name="%s" description="%s">n' %
(str(i),titre,str(normalize_whitespace(contenus_sections[0])) ))
for titre_ssect, contenu_sect in pardeux(contenus_sections[1:]):
fout.write('<sousSection id="%s" thema="%s">' %
(str(j),normalize_whitespace(titre_ssect)))
fout.write('<![CDATA[%s]]>' %
normalize_whitespace(contenu_sect))
fout.write('</sousSection>n')
j=j+1
fout.write('</section>n')
i=i+1
fout.write('</document>')

open('out.xml','w').write(fout.getvalue())

# -*- coding: Latin-1 -*-

from cStringIO import StringIO
import re

def xReplace(t,a,b):
if not a: return t
return xReplace(t.replace(a[0],b[0]),a[1:],b[1:])

def remplacer(t,c,f):
while True:
try:
i = t.index(c)
j = i + t[i:].index('{')
k = j + t[j:].index('}')
t = t.replace(t[i:k+1],f(t[j+1:k]))
except: break
return t

def pardeux(iterable, n=2):
it = iter(iterable)
next = it.next
while True:
yield next(), next()

def normalize_whitespace(text): return ' '.join(text.split())

file_nom = '/home/remi/droit_tice/faq_gest.tex'
file_string = ''.join([x for x in open(file_nom,'r').readlines() if
x[0]<>'%'])

corps = r"""\begin{document}(?P<contents>.*)\end{document}"""
r0 =
['\begin{enumerate}','end{enumerate}','\begin{itemize}','end{itemize}']
r1 = ['<ol>','</ol>','<ul>','</ul>']

contents = re.compile(corps, re.IGNORECASE|
re.DOTALL).search(file_string).group('contents')
contents = xReplace(contents,r0,r1)
contents = remplacer(contents, '\url', lambda x: 'n<a href = "' + x +
'">' + x + '</a>')
contents = re.compile(r'\item
(.*?)''n').sub(r'<li>1</li>n',contents)
contents = contents.replace(''','\\\'')
contents = remplacer(contents, '\textit', lambda x: 'n<i>' + x +
'</i>')

contents_list = re.compile(r'\section{(.*)}').split(contents)

fout = StringIO()
fout.write('<?xml version="1.0" encoding="ISO-8859-1"?>n')
fout.write('n')
fout.write('<document>n')
fout.write('<introduction>%s</introduction>n' %
normalize_whitespace(contents_list[0]))

i = j = 1
for titre, contenu in pardeux(contents_list[1:]):
contenus_sections =
re.split(re.compile(r'\subsection{(.*)}'),contenu)
fout.write('<section id="%s" name="%s" description="%s">n' %
(str(i),titre,str(normalize_whitespace(contenus_sections[0])) ))
for titre_ssect, contenu_sect in pardeux(contenus_sections[1:]):
fout.write('<sousSection id="%s" thema="%s">' %
(str(j),normalize_whitespace(titre_ssect)))
fout.write('<![CDATA[%s]]>' %
normalize_whitespace(contenu_sect))
fout.write('</sousSection>n')
j=j+1
fout.write('</section>n')
i=i+1
fout.write('</document>')

open('out.xml','w').write(fout.getvalue())

Vous avez filtré cet utilisateur ! Consultez son message

# -*- coding: Latin-1 -*-

from cStringIO import StringIO
import re

def xReplace(t,a,b):
if not a: return t
return xReplace(t.replace(a[0],b[0]),a[1:],b[1:])

def remplacer(t,c,f):
while True:
try:
i = t.index(c)
j = i + t[i:].index('{')
k = j + t[j:].index('}')
t = t.replace(t[i:k+1],f(t[j+1:k]))
except: break
return t

def pardeux(iterable, n=2):
it = iter(iterable)
next = it.next
while True:
yield next(), next()

def normalize_whitespace(text): return ' '.join(text.split())

file_nom = '/home/remi/droit_tice/faq_gest.tex'
file_string = ''.join([x for x in open(file_nom,'r').readlines() if
x[0]<>'%'])

corps = r"""begin{document}(?P<contents>.*)end{document}"""
r0 =
['begin{enumerate}','end{enumerate}','begin{itemize}','end{itemize}']
r1 = ['<ol>','</ol>','<ul>','</ul>']

contents = re.compile(corps, re.IGNORECASE|
re.DOTALL).search(file_string).group('contents')
contents = xReplace(contents,r0,r1)
contents = remplacer(contents, 'url', lambda x: 'n<a href = "' + x +
'">' + x + '</a>')
contents = re.compile(r'item
(.*?)''n').sub(r'<li>1</li>n',contents)
contents = contents.replace(''','\'')
contents = remplacer(contents, 'textit', lambda x: 'n<i>' + x +
'</i>')

contents_list = re.compile(r'section{(.*)}').split(contents)

fout = StringIO()
fout.write('<?xml version="1.0" encoding="ISO-8859-1"?>n')
fout.write('n')
fout.write('<document>n')
fout.write('<introduction>%s</introduction>n' %
normalize_whitespace(contents_list[0]))

i = j = 1
for titre, contenu in pardeux(contents_list[1:]):
contenus_sections =
re.split(re.compile(r'subsection{(.*)}'),contenu)
fout.write('<section id="%s" name="%s" description="%s">n' %
(str(i),titre,str(normalize_whitespace(contenus_sections[0])) ))
for titre_ssect, contenu_sect in pardeux(contenus_sections[1:]):
fout.write('<sousSection id="%s" thema="%s">' %
(str(j),normalize_whitespace(titre_ssect)))
fout.write('<![CDATA[%s]]>' %
normalize_whitespace(contenu_sect))
fout.write('</sousSection>n')
j=j+1
fout.write('</section>n')
i=i+1
fout.write('</document>')

open('out.xml','w').write(fout.getvalue())

remi

23/05/2005 à 21:34

Bonjour,

# -*- coding: Latin-1 -*-
[...]

Merci beaucoup pour toutes ces remarques !!
J'étudie tout ça...
Bonne soirée !
@+
Rémi.

remi

23/05/2005 à 21:46

Bonjour,

# -*- coding: Latin-1 -*-

contents = re.compile(corps, re.IGNORECASE|
re.DOTALL).search(file_string).group('contents')

Un joli "one-liner" ;-) Comme il y en a plusieurs.
il manque plus qu'une écriture plus OO comme un autre intervenant l'a
proposé.
:-)
@+ et merci !
Vive fclp !
Rémi

LaTeX->XML->SQL - critique

3 réponses

Veuillez sélectionner un problème