intimate/extractor/openrec_extractor/openrec_test.go

97 lines
1.6 KiB
Go
Raw Normal View History

2020-07-16 10:31:13 +00:00
package main
import (
"io/ioutil"
"os"
"regexp"
"testing"
"time"
"github.com/lestrrat-go/libxml2"
)
func TestCase0(t *testing.T) {
f, err := os.Open("./test.html")
if err != nil {
panic(err)
}
data, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(string(data), -1)
t.Error(matheslist)
}
func TestCase1(t *testing.T) {
date := "2020-07-13T18:58:24+09:00"
tm, err := time.Parse("2006-01-02T15:04:05Z07:00", date)
t.Error(err)
t.Error(time.Now())
t.Error(tm.Local().UTC(), tm.Local())
}
func TestCase2(t *testing.T) {
duration1 := "0:00:00"
duration2 := "4:56:04"
tm2, err := time.Parse("15:04:05", duration2)
tm1, err := time.Parse("15:04:05", duration1)
tm2.Sub(tm1)
t.Error(err)
t.Error(tm2.Sub(tm1))
}
func TestCase(t *testing.T) {
f, _ := os.Open("./test.html")
data, _ := ioutil.ReadAll(f)
doc, err := libxml2.ParseHTML(data)
if err != nil {
panic(err)
}
// doc.CreateElement("meta")
// "<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">"
xresult, err := doc.Find("/html/head")
ele, err := doc.CreateElement(`META`)
if err != nil {
panic(err)
}
ele.SetAttribute("charset", "utf-8")
if err != nil {
panic(err)
}
iter := xresult.NodeIter()
if iter.Next() {
n := iter.Node()
err = n.AddChild(ele)
// childs, err := n.ChildNodes()
if err != nil {
t.Error(err)
}
t.Error(n)
}
xr, err := doc.Find("//h1[ contains(@class, 'MovieTitle__Title')]")
if err != nil {
panic(nil)
}
t.Error(xr)
}
func TestExtractor(t *testing.T) {
oe := &OpenrecExtractor{}
oe.Execute()
}