From 24bd0266242e221448ef398568796bd956d9b801 Mon Sep 17 00:00:00 2001
From: Tormod Volden <debian.tormod@gmail.com>
Date: Sat, 17 Aug 2024 19:50:12 +0200
Subject: [PATCH] doc: Port from htmllib to html.parser HTMLParser

htmllib was deprecated in 2.6 and removed in 3.x.

Instead of dealing with non-breakable space in the HTMLParser,
transform to spaces in parse_asoundlib_api() instead.

At the same time adjust to the current ALSA API web page format.

Closes: https://github.com/alsa-project/alsa-python/pull/15
Signed-off-by: Tormod Volden <debian.tormod@gmail.com>
Signed-off-by: Jaroslav Kysela <perex@perex.cz>
---
 doc/APICoverage.py | 60 ++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/doc/APICoverage.py b/doc/APICoverage.py
index fcb69dd..02adfac 100644
--- a/doc/APICoverage.py
+++ b/doc/APICoverage.py
@@ -25,7 +25,7 @@
 import os, pickle, urllib.request, urllib.parse, urllib.error, sys
 from pyparsing import *
 from html.entities import entitydefs
-from htmllib import HTMLParser
+from html.parser import HTMLParser
 from formatter import AbstractFormatter, DumbWriter
 
 # cache dir (preparsed source and HTML asoundlib API)
@@ -114,29 +114,24 @@ class AsoundlibAPIHTMLParser(HTMLParser):
     HTML asoundlib API from the alsa website.
     """
     
-    HTMLParser.entitydefs['nbsp'] = ' '
-    
     def __init__(self, name, data):
-        f = AbstractFormatter(DumbWriter(open(name, 'w'), 100))
-        HTMLParser.__init__(self, f)
+        self.f = AbstractFormatter(DumbWriter(open(name, 'w'), 100))
+        HTMLParser.__init__(self)
         self.feed(data)
         self.close()
 
-    def start_h1(self, attrs):
-        HTMLParser.start_h1(self, attrs)
-        self.handle_data("--- titlestart")
-        self.do_br(None)
-
-    def start_table(self, attrs):
-        if len(attrs) == 1 and attrs[0] == ("class", "memname"):
-            self.handle_data("--- itemstart")
-            self.do_br(None)
+    def handle_data(self, data):
+        self.f.add_literal_data(data)
 
-    def start_tr(self, attrs):
-        self.do_br(None)
-
-    def anchor_end(self):
-        pass
+    def handle_starttag(self, tag, attrs):
+        if tag == "div":
+            if len(attrs) == 1 and attrs[0] == ("class", "title"):
+                self.handle_data("\n--- titlestart\n")
+            if len(attrs) == 1 and attrs[0] == ("class", "ingroups"):
+                self.handle_data("\n\n")
+        elif tag == 'table':
+            if len(attrs) == 1 and attrs[0] == ("class", "memname"):
+                self.handle_data("\n--- itemstart")
 
 def parse_asoundlib_api(lines):
     """
@@ -154,7 +149,8 @@ def parse_asoundlib_api(lines):
     comment = ""
     enumsublist = []
     for line in lines:
-        line = line[:-1]
+        # convert &nbsp; to space
+        line = line[:-1].replace('\xa0', ' ')
         if False:
             if id(current) == id(defines):
                 print("defines   ", end=' ')
@@ -168,7 +164,7 @@ def parse_asoundlib_api(lines):
                 print("          ", end=' ')
             print("%s %d %s" % (id(current), state, line))
 
-        if line.startswith('Define Documentation'):
+        if line.startswith('Macro Definition Documentation'):
             current = defines
             state = 0
         elif line.startswith('Typedef Documentation'):
@@ -185,36 +181,42 @@ def parse_asoundlib_api(lines):
         elif line.startswith('--- titlestart'):
             state = 5
         elif state == 5:
-            title = line
+            title = line.strip()
             state = 0
         elif current == None:
             continue
         elif state == 1:
             if line == "":
+                name = ' '.join(name.split())
                 state = 2
             else:
                 name += line
-        elif state == 2:
+        elif state == 2 and line != "":
+            comment = line.strip()
             if id(current) == id(enums):
                 state = 3
             else:
-                comment = line
                 current.append((name, comment))
                 name = ""
                 comment = ""
                 state = 0
-        elif state == 3 and line.startswith('Enumerator:'):
+        elif state == 3 and line.startswith('Enumerator'):
+            enum, subcomment = line[10:].split(' ', 1)
+            enumsublist = [(enum.strip(), subcomment.strip())]
+            linewasempty = False
             state = 4
-            enumsublist = []
         elif state == 4:
-            if line == "":
+            if linewasempty and line == "":
                 current.append((name, comment, enumsublist))
                 name = ""
                 comment = ""
                 state = 0
+            elif line == "":
+                linewasempty = True
             else:
                 enum, subcomment = line.split(' ', 1)
-                enumsublist.append((enum, subcomment))
+                enumsublist.append((enum.strip(), subcomment.strip()))
+                linewasempty = False
 
     return (title, defines, typedefs, enums, functions)
         
@@ -353,7 +355,7 @@ def print_api_coverage(urls, look_constant, look_usage, excludes):
                 name = names[-1]
                 if ')' in name:
                     names = d[0].split('(')
-                    name = names[-2].split()[-1]
+                    name = names[-2].removesuffix(') ').removeprefix('* ')
                 print_name(d[0], d[1], name, look_constant, look_usage, el)
             print_stat(title, "Typedefs")
             print("\n"*2)
-- 
2.47.3