import pandas as pd

This article is also a Jupyter Notebook available to be run from the top down. There will be code snippets that you can then run in any environment.

Below are the versions of fastai, fastcore, wwf, fastaudio, and torchaudio currently running at the time of writing this:

  • fastai : 2.1.5
  • fastcore : 1.3.4
  • wwf : 0.0.16
  • fastaudio : 0.1.3
  • torchaudio : 0.9.0

import pkg_resources
def placeholder(x):
    raise pkg_resources.DistributionNotFound
pkg_resources.get_distribution = placeholder
import fastai
import fastaudio
from fastai.vision.all import *
from fastaudio.core.all import *
from fastaudio.augment.all import *
data = pd.read_csv("comma labels.csv", index_col='path')
data
pattern kana syl drop
path
ある.yomi000142BB_0596.mp3 頭高 アル 2 1
ある.yomi000142BB_0596.mp3 頭高 アル 2 1
思う.yomi0006C617_043A.mp3 中高 オモウ 3 2
など.yomi000240B7_0028.mp3 頭高 ナド 2 1
私.yomi00092F63_0072.mp3 平板 ワタくシ 4 0
... ... ... ... ...
くも膜下出血_蜘蛛膜下出血.yomi0001AAD1_0622.mp3 中高 クモマッカしュッケツ 10 6
捜す.yomi00072507_0088.mp3 平板 サカ゚ス 4 0
捜し物.yomi000724FD_0424.mp3 平板 サカ゚シモノ 6 0
あこや貝_阿古屋貝.yomi00013767_0114.mp3 中高 アコヤカ゚イ 6 3
あこや貝_阿古屋貝.yomi00013767_0114.mp3 中高 アコヤカ゚イ 6 3

74191 rows × 4 columns

data.loc['1すくい_一掬い.yomi000120BB_01C8.mp3']
pattern kana syl drop
path
1すくい_一掬い.yomi000120BB_01C8.mp3 中高 ひトすクイ 5 2
1すくい_一掬い.yomi000120BB_01C8.mp3 中高 ひトすクイ 5 2
data.loc['出し抜く.yomi0004BDF8_06F2.mp3']
pattern      平板
kana       ダシヌク
syl           4
drop          0
Name: 出し抜く.yomi0004BDF8_06F2.mp3, dtype: object
data.loc['ある.yomi000142BB_0596.mp3']
pattern kana syl drop
path
ある.yomi000142BB_0596.mp3 頭高 アル 2 1
ある.yomi000142BB_0596.mp3 頭高 アル 2 1
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
path = Path('/content/drive/MyDrive')
path = path / '1000sample'
at = AudioTensor.create(path.ls()[0])
at.show()
<matplotlib.axes._subplots.AxesSubplot at 0x7f7a5b38aed0>
cfg = AudioConfig.Voice()
aud2spec = AudioToSpec.from_cfg(cfg)
aud2spec(at).show()
<matplotlib.axes._subplots.AxesSubplot at 0x7f7a5b50f950>
crop2s = ResizeSignal(2000)
crop2s(at).show()
<matplotlib.axes._subplots.AxesSubplot at 0x7f7a5b50f5d0>
pipe = Pipeline([AudioTensor.create, crop2s, aud2spec])
for i in range(5):
  pipe(path.ls()[i]).show()
 
data.loc['ある.yomi000142BB_0596.mp3']['pattern']
'頭高'
data.reset_index()[data.reset_index().duplicated(keep=False)].to_csv('duplicates.csv')
data[data.index.duplicated(keep=False)].to_csv('path_dup.csv')
data.duplicated().sum()
15695
data.reset_index()
path pattern kana syl drop
0 ある.yomi000142BB_0596.mp3 頭高 アル 2 1
1 ある.yomi000142BB_0596.mp3 頭高 アル 2 1
2 思う.yomi0006C617_043A.mp3 中高 オモウ 3 2
3 など.yomi000240B7_0028.mp3 頭高 ナド 2 1
4 私.yomi00092F63_0072.mp3 平板 ワタくシ 4 0
... ... ... ... ... ...
74186 くも膜下出血_蜘蛛膜下出血.yomi0001AAD1_0622.mp3 中高 クモマッカしュッケツ 10 6
74187 捜す.yomi00072507_0088.mp3 平板 サカ゚ス 4 0
74188 捜し物.yomi000724FD_0424.mp3 平板 サカ゚シモノ 6 0
74189 あこや貝_阿古屋貝.yomi00013767_0114.mp3 中高 アコヤカ゚イ 6 3
74190 あこや貝_阿古屋貝.yomi00013767_0114.mp3 中高 アコヤカ゚イ 6 3

74191 rows × 5 columns

data = data.reset_index()

removing complete duplicates

data = data[-data.duplicated()]
data.to_csv('labels complete duplicates removed.csv')

duplicate labels

data.duplicated(subset=['path'], keep=False).sum()
687
data = data[-data.duplicated(subset=['path'], keep=False)]
data.to_csv('labels_no_duplicates.csv')
sr = data[data.path == '捜す.yomi00072507_0088.mp3']['pattern'].values[0]
sr.values[0]
'平板'
item_tfms = [ResizeSignal(1000), aud2spec]
def get_label(fn):
  return data[data.path == fn]['pattern'].values[0]
audpitch = DataBlock(blocks=(AudioBlock, CategoryBlock),  
                 get_items=get_audio_files, 
                 splitter=RandomSplitter(),
                 item_tfms = [crop2s, aud2spec],
                 get_y=using_attr(get_label, 'name'))
for fn in path.ls():
  print(fn)
  try:
    get_label(fn.name)
  except: fn.unlink()
for fn in path.ls():
  print(fn)
  print(get_label(fn.name))
learn = Learner(dls, xresnet18(), CrossEntropyLossFlat(), metrics=accuracy)
n_c = dls.one_batch()[0].shape[1]; n_c
1
def alter_learner(learn, n_channels=1):
  "Adjust a `Learner`'s model to accept `1` channel"
  layer = learn.model[0][0]
  layer.in_channels=n_channels
  layer.weight = nn.Parameter(layer.weight[:,1,:,:].unsqueeze(1))
  learn.model[0][0] = layer
alter_learner(learn, n_c)
learn.lr_find()
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
SuggestedLRs(lr_min=0.03019951581954956, lr_steep=0.00363078061491251)
learn.fit_one_cycle(5, 1e-3)
epoch train_loss valid_loss accuracy time
0 0.250854 0.228218 0.937931 00:12
1 0.246093 1.324811 0.544828 00:12
2 0.241305 0.378529 0.882759 00:12
3 0.232387 0.211727 0.951724 00:12
4 0.225047 0.219429 0.965517 00:12
learn.fit_one_cycle(5, 1e-4)
epoch train_loss valid_loss accuracy time
0 0.204818 0.208217 0.958621 00:12
1 0.204138 0.206059 0.951724 00:12
2 0.202360 0.217017 0.958621 00:12
3 0.198590 0.214045 0.958621 00:12
4 0.198357 0.220390 0.944828 00:12
train = dls.train
m = learn.model
y = dls[0]
names = [fn.name for fn in path.ls()]
datasample =  data[data.path.isin(names)]
datasample.groupby('pattern').count()
path kana syl drop
pattern
中高 229 229 229 229
尾高 18 18 18 18
平板 403 403 403 403
頭高 78 78 78 78
data.groupby('pattern').count()
path kana syl drop
pattern
中高 24444 24444 24444 24444
尾高 1461 1461 1461 1461
平板 30551 30551 30551 30551
頭高 10038 10038 10038 10038