view syllabus/getSyllabus.go @ 6:fca852bfd500

...
author anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp>
date Tue, 31 Mar 2020 15:12:16 +0900
parents a0d23f38344d
children 514dc6c6a683
line wrap: on
line source

package syllabus

import (
	"bufio"
	"context"
	"fmt"
	"io"
	"net/http"
	"net/url"
	"os"
	"path"
	"path/filepath"
	"strconv"
	"strings"
	"time"

	"github.com/pkg/errors"
)

// LectureDay include day of week  (0~4, error -> 5), period, lastpriod (1~6)
type LectureDay struct {
	DayOfWeek  int
	Period     int
	LastPeriod int
}

//Lecture ID is ex. 600625001 , Name is プログラミング1, Day is LecutreDay
type Lecture struct {
	ID      string
	Name    string
	Day     LectureDay
	Teacher string
}

type LectureWPath struct {
	ID   string
	Path string
}

//CreateGetSyllabus is constructor  and initialize from now time
func CreateGetSyllabus() *GetSyllabus {
	var gs GetSyllabus
	tm := time.Now()
	//gs.year = tm.Year()
	gs.year = 2019
	if tm.Month() < 7 {
		gs.term = "previous"
	} else {
		gs.term = "latter"
	}
	gs.term = "previous"

	gs.outputdir = filepath.Join(strconv.Itoa(gs.year), gs.term)
	return &gs
}

var dayPeriodID = "ctl00_phContents_Detail_lbl_day_period\">"
var lectureNameID = "ctl00_phContents_Detail_lbl_lbl_lct_name_double\">"
var teacherNameID = "ctl00_phContents_Detail_lbl_syl_staff_name_double\">"
var endSpan = "</span>"
var dayOfWeeklen = len("月")

//"https://tiglon.jim.u-ryukyu.ac.jp/portal/Public/Syllabus/SyllabusSearchStart.aspx?lct_year=2019&lct_cd=610004071&je_cd=1"
var endpoint = "https://tiglon.jim.u-ryukyu.ac.jp"

//CheckAndMkdirBuilddir is builld 2019/early dir
func (g *GetSyllabus) CheckAndMkdirBuilddir() (bool, error) {
	if f, err := os.Stat(g.outputdir); os.IsNotExist(err) || !f.IsDir() {
		err := os.MkdirAll(g.outputdir, 0755)
		if err != nil {
			return false, errors.Wrap(err, "failed mkdir")
		}
		return true, nil
	}
	return true, nil
}

func (g *GetSyllabus) LecIDStoDonwlodSyllabus(ctx context.Context, lectureIDs []string) (*[]LectureWPath, error) {
	//var wg sync.WaitGroup
	ch := make(chan LectureWPath, len(lectureIDs))
	for _, id := range lectureIDs {
		//wg.Add(1)
		go func(id string) {
			//defer wg.Done()
			outputPath, _ := g.LecIDtoDownloadSyllabus(id)
			ch <- LectureWPath{
				ID:   id,
				Path: outputPath,
			}
		}(id)
	}
	//wg.Wait()

	var lwps []LectureWPath
	for range lectureIDs {
		lwps = append(lwps, <-ch)
	}
	return &lwps, nil
}

//LecIDtoDownloadSyllabus is download from lecture ID
func (g *GetSyllabus) LecIDtoDownloadSyllabus(lectureID string) (string, error) {
	var strBuilder strings.Builder
	strBuilder.WriteString(lectureID)
	strBuilder.WriteString(".html")

	outputPath := filepath.Join(g.outputdir, strBuilder.String())

	if _, err := os.Stat(outputPath); err == nil {
		return outputPath, nil
	}

	file, err := os.Create(outputPath)
	defer file.Close()

	if err != nil {
		return "", errors.Wrap(err, "failed create html...")
	}

	strBuilder.Reset()

	u, err := url.Parse(endpoint)
	if err != nil {
		return "", err
	}

	u.Path = path.Join(u.Path, "portal", "Public", "Syllabus", "SyllabusSearchStart.aspx")
	q := u.Query()
	q.Set("lct_year", strconv.Itoa(g.year))
	q.Set("lct_cd", lectureID)
	q.Set("je_cd", "1")
	u.RawQuery = q.Encode()

	fmt.Println(u.String())
	res, err := http.Get(u.String())
	defer res.Body.Close()

	if err != nil {
		return "", errors.Wrap(err, "failed download html")
	}

	_, err = io.Copy(file, res.Body)
	if err != nil {
		return "", errors.Wrap(err, "failed download html")
	}

	return outputPath, nil
}

//LecIDwFilePath2LectureStruct is require LectureID (== Lecture.ID), filePath ( syllabus.html path)
func (g *GetSyllabus) LecIDwFilePath2LectureStruct(lwp *LectureWPath) (*Lecture, error) {
	file, err := os.Open(lwp.Path)

	if err != nil {
		return nil, errors.Wrap(err, "failed open html file")
	}
	scanner := bufio.NewScanner(file)

	var lec Lecture
	lec.ID = lwp.ID

	for scanner.Scan() {
		line := scanner.Text()

		// day Period
		if i := strings.Index(line, dayPeriodID); i >= 0 {
			if j := strings.Index(line, endSpan); j >= 0 {
				i += len(dayPeriodID)
				day := line[i:j]

				lec.Day.DayOfWeek = kanjiday2int(day[0:dayOfWeeklen])
				if err != nil {
					return nil, errors.Wrap(err, "failed convert day")
				}
				lec.Day.Period, err = strconv.Atoi(day[dayOfWeeklen : dayOfWeeklen+1])
				if err != nil {
					return nil, errors.Wrap(err, "failed convert day")
				}
				if len(day) != (dayOfWeeklen + 1) { // dayOfWeeklen + 1 == 月3, 火2
					lec.Day.LastPeriod, err = strconv.Atoi(day[dayOfWeeklen+4:]) // 4 is \d + 〜
					if err != nil {
						return nil, errors.Wrap(err, "failed convert day")
					}
					continue
				}
				lec.Day.LastPeriod = -1
				continue
			}
		}

		// lecture name
		if i := strings.Index(line, lectureNameID); i >= 0 {
			if j := strings.Index(line, endSpan); j >= 0 {
				i += len(lectureNameID)
				lec.Name = line[i:j]
			}
			continue
		}

		//teacher name
		if i := strings.Index(line, teacherNameID); i >= 0 {
			if j := strings.Index(line, endSpan); j >= 0 {
				i += len(teacherNameID)
				lec.Teacher = line[i:j]
			}
			break
		}
	}

	file.Close()
	return &lec, nil
}

func kanjiday2int(kanjiDay string) int {
	switch kanjiDay {
	case "月":
		return 0
	case "火":
		return 1
	case "水":
		return 2
	case "木":
		return 3
	case "金":
		return 4
	}
	return -1
}